From f75c7ac979f45c3f6b40b21c00944bca702200f8 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 4 Mar 2024 14:29:31 +0100
Subject: [PATCH 01/12] Compile xtask in --release

---
 .cargo/config.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.cargo/config.toml b/.cargo/config.toml
index 35049cbcb..e11d56a31 100644
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,2 +1,2 @@
 [alias]
-xtask = "run --package xtask --"
+xtask = "run --release --package xtask --"

From 6862caef648b046c23439ca05bf9a494e6d0fd37 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 26 Feb 2024 16:37:35 +0100
Subject: [PATCH 02/12] Span Stats compute self-time

---
 tracing-trace/src/processor/span_stats.rs | 92 +++++++++++++++++++++--
 1 file changed, 85 insertions(+), 7 deletions(-)

diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs
index f3e6238ff..584fe53f8 100644
--- a/tracing-trace/src/processor/span_stats.rs
+++ b/tracing-trace/src/processor/span_stats.rs
@@ -1,4 +1,5 @@
 use std::collections::{BTreeMap, HashMap};
+use std::ops::Range;
 use std::time::Duration;
 
 use serde::{Deserialize, Serialize};
@@ -16,6 +17,51 @@ enum SpanStatus {
 pub struct CallStats {
     pub call_count: usize,
     pub time: u64,
+    pub self_time: u64,
+}
+
+#[derive(Debug, Default)]
+pub struct SelfTime {
+    child_ranges: Vec<Range<Duration>>,
+}
+
+impl SelfTime {
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    pub fn add_child_range(&mut self, child_range: Range<Duration>) {
+        self.child_ranges.push(child_range)
+    }
+
+    pub fn self_duration(&mut self, self_range: Range<Duration>) -> Duration {
+        if self.child_ranges.is_empty() {
+            return self_range.end - self_range.start;
+        }
+
+        // by sorting child ranges by their start time,
+        // we make sure that no child will start before the last one we visited.
+        self.child_ranges
+            .sort_by(|left, right| left.start.cmp(&right.start).then(left.end.cmp(&right.end)));
+        // self duration computed by adding all the segments where the span is not executing a child
+        let mut self_duration = Duration::from_nanos(0);
+
+        // last point in time where we are certain that this span was not executing a child.
+        let mut committed_point = self_range.start;
+
+        for child_range in &self.child_ranges {
+            if child_range.start > committed_point {
+                // we add to the self duration the point between the end of the latest span and the beginning of the next span
+                self_duration += child_range.start - committed_point;
+            }
+            if committed_point < child_range.end {
+                // then we set ourselves to the end of the latest span
+                committed_point = child_range.end;
+            }
+        }
+
+        self_duration
+    }
 }
 
 pub fn to_call_stats<R: std::io::Read>(
@@ -23,6 +69,9 @@ pub fn to_call_stats<R: std::io::Read>(
 ) -> Result<BTreeMap<String, CallStats>, Error> {
     let mut calls = HashMap::new();
     let mut spans = HashMap::new();
+    let mut last_point = Duration::from_nanos(0);
+    let mut first_point = None;
+    let mut total_self_time = SelfTime::new();
     for entry in trace {
         let entry = entry?;
         match entry {
@@ -31,10 +80,11 @@ pub fn to_call_stats<R: std::io::Read>(
             }
             Entry::NewThread(_) => {}
             Entry::NewSpan(span) => {
-                spans.insert(span.id, (span, SpanStatus::Outside));
+                spans.insert(span.id, (span, SpanStatus::Outside, SelfTime::new()));
             }
             Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => {
-                let (_, status) = spans.get_mut(&id).unwrap();
+                first_point.get_or_insert(time);
+                let (_, status, _) = spans.get_mut(&id).unwrap();
 
                 let SpanStatus::Outside = status else {
                     continue;
@@ -43,18 +93,32 @@ pub fn to_call_stats<R: std::io::Read>(
                 *status = SpanStatus::Inside(time);
             }
             Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => {
-                let (span, status) = spans.get_mut(&id).unwrap();
+                let (span, status, self_time) = spans.get_mut(&id).unwrap();
 
                 let SpanStatus::Inside(begin) = status else {
                     continue;
                 };
                 let begin = *begin;
 
+                if last_point < end {
+                    last_point = end;
+                }
+
                 *status = SpanStatus::Outside;
 
+                let self_range = begin..end;
+
+                let self_duration = self_time.self_duration(self_range.clone());
+                *self_time = SelfTime::new();
+
                 let span = *span;
+                if let Some(parent_id) = span.parent_id {
+                    let (_, _, parent_self_time) = spans.get_mut(&parent_id).unwrap();
+                    parent_self_time.add_child_range(self_range.clone())
+                }
+                total_self_time.add_child_range(self_range);
                 let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
-                call_list.push(end - begin);
+                call_list.push((end - begin, self_duration));
             }
             Entry::SpanClose(SpanClose { id, time: _ }) => {
                 spans.remove(&id);
@@ -63,17 +127,31 @@ pub fn to_call_stats<R: std::io::Read>(
         }
     }
 
+    let total_self_time = first_point
+        .map(|first_point| (first_point, total_self_time.self_duration(first_point..last_point)));
+
     Ok(calls
         .into_iter()
         .map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls)))
+        .chain(total_self_time.map(|(first_point, total_self_time)| {
+            (
+                "::meta::total".to_string(),
+                CallStats {
+                    call_count: 1,
+                    time: (last_point - first_point).as_nanos() as u64,
+                    self_time: total_self_time.as_nanos() as u64,
+                },
+            )
+        }))
         .collect())
 }
 
 fn site_to_string(call_site: NewCallsite) -> String {
     format!("{}::{}", call_site.target, call_site.name)
 }
-fn calls_to_stats(calls: Vec<Duration>) -> CallStats {
+fn calls_to_stats(calls: Vec<(Duration, Duration)>) -> CallStats {
     let nb = calls.len();
-    let sum: Duration = calls.iter().sum();
-    CallStats { call_count: nb, time: sum.as_nanos() as u64 }
+    let sum: Duration = calls.iter().map(|(total, _)| total).sum();
+    let self_sum: Duration = calls.iter().map(|(_, self_duration)| self_duration).sum();
+    CallStats { call_count: nb, time: sum.as_nanos() as u64, self_time: self_sum.as_nanos() as u64 }
 }

From b11df7ec341ba5987e2be644a008c6b5937666e9 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 26 Feb 2024 16:38:17 +0100
Subject: [PATCH 03/12] Meilisearch: fix some wrong spans

---
 milli/src/update/index_documents/extract/mod.rs | 3 +--
 milli/src/update/index_documents/mod.rs         | 2 +-
 milli/src/update/index_documents/typed_chunk.rs | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 251a2db99..43f3f4947 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -210,8 +210,7 @@ fn run_extraction_task<FE, FS, M>(
     let current_span = tracing::Span::current();
 
     rayon::spawn(move || {
-        let child_span =
-            tracing::trace_span!(target: "", parent: &current_span, "extract_multiple_chunks");
+        let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: &current_span, "extract_multiple_chunks");
         let _entered = child_span.enter();
         puffin::profile_scope!("extract_multiple_chunks", name);
         match extract_fn(chunk, indexer) {
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 61ca1a024..7499b68e5 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -284,7 +284,7 @@ where
     #[tracing::instrument(
         level = "trace",
         skip_all,
-        target = "profile::indexing::details",
+        target = "indexing::details",
         name = "index_documents_raw"
     )]
     pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 1fea9a70f..6aad290e5 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -473,7 +473,7 @@ pub(crate) fn write_typed_chunk_into_index(
             is_merged_database = true;
         }
         TypedChunk::FieldIdFacetIsEmptyDocids(_) => {
-            let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
+            let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
             let _entered = span.enter();
 
             let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);

From 86ce843f3d74d9acc02552af17903b6dc3fd4fc1 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 26 Feb 2024 21:29:20 +0100
Subject: [PATCH 04/12] Add cargo xtask bench

---
 Cargo.lock                  |   57 +-
 xtask/Cargo.toml            |   25 +
 xtask/src/bench/env_info.rs |  129 ++++
 xtask/src/bench/mod.rs      | 1159 +++++++++++++++++++++++++++++++++++
 xtask/src/lib.rs            |    1 +
 xtask/src/main.rs           |    6 +-
 6 files changed, 1370 insertions(+), 7 deletions(-)
 create mode 100644 xtask/src/bench/env_info.rs
 create mode 100644 xtask/src/bench/mod.rs
 create mode 100644 xtask/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index 971ab602a..eca3b2fbc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -440,6 +440,12 @@ dependencies = [
  "syn 2.0.48",
 ]
 
+[[package]]
+name = "atomic"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba"
+
 [[package]]
 name = "atomic-polyfill"
 version = "0.1.11"
@@ -3488,6 +3494,12 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
 [[package]]
 name = "num-integer"
 version = "0.1.45"
@@ -4218,10 +4230,12 @@ dependencies = [
  "system-configuration",
  "tokio",
  "tokio-rustls 0.24.1",
+ "tokio-util",
  "tower-service",
  "url",
  "wasm-bindgen",
  "wasm-bindgen-futures",
+ "wasm-streams",
  "web-sys",
  "webpki-roots 0.25.3",
  "winreg",
@@ -4934,12 +4948,13 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e"
+checksum = "fe80ced77cbfb4cb91a94bf72b378b4b6791a0d9b7f09d0be747d1bdff4e68bd"
 dependencies = [
  "deranged",
  "itoa",
+ "num-conv",
  "powerfmt",
  "serde",
  "time-core",
@@ -4954,10 +4969,11 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
 
 [[package]]
 name = "time-macros"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f"
+checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
 dependencies = [
+ "num-conv",
  "time-core",
 ]
 
@@ -5395,10 +5411,11 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
 
 [[package]]
 name = "uuid"
-version = "1.6.1"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
+checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a"
 dependencies = [
+ "atomic",
  "getrandom",
  "serde",
 ]
@@ -5539,6 +5556,19 @@ version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"
 
+[[package]]
+name = "wasm-streams"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
 [[package]]
 name = "wav"
 version = "1.0.0"
@@ -5873,8 +5903,23 @@ dependencies = [
 name = "xtask"
 version = "1.7.0"
 dependencies = [
+ "anyhow",
  "cargo_metadata",
  "clap",
+ "futures-core",
+ "futures-util",
+ "git2",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha2",
+ "sysinfo",
+ "time",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+ "tracing-trace",
+ "uuid",
 ]
 
 [[package]]
diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml
index 07271ea09..a59a79e53 100644
--- a/xtask/Cargo.toml
+++ b/xtask/Cargo.toml
@@ -11,5 +11,30 @@ license.workspace = true
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+anyhow = "1.0.79"
 cargo_metadata = "0.18.1"
 clap = { version = "4.4.14", features = ["derive"] }
+futures-core = "0.3.30"
+futures-util = "0.3.30"
+git2 = { version = "0.16", default_features = false }
+reqwest = { version = "0.11.23", features = [
+    "stream",
+    "json",
+    "rustls-tls",
+], default_features = false }
+serde = { version = "1.0.195", features = ["derive"] }
+serde_json = "1.0.111"
+sha2 = "0.10.8"
+sysinfo = "0.30.5"
+time = { version = "0.3.32", features = ["serde", "serde-human-readable"] }
+tokio = { version = "1.35.1", features = [
+    "rt",
+    "net",
+    "time",
+    "process",
+    "signal",
+] }
+tracing = "0.1.40"
+tracing-subscriber = "0.3.18"
+tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
+uuid = { version = "1.7.0", features = ["v7", "serde"] }
diff --git a/xtask/src/bench/env_info.rs b/xtask/src/bench/env_info.rs
new file mode 100644
index 000000000..5cbeb4274
--- /dev/null
+++ b/xtask/src/bench/env_info.rs
@@ -0,0 +1,129 @@
+use serde::{Deserialize, Serialize};
+use time::OffsetDateTime;
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Source {
+    pub repo_url: Option<String>,
+    pub branch_or_tag: String,
+    pub commit_id: String,
+    pub commit_msg: String,
+    pub author_name: String,
+    pub author_email: String,
+    pub committer_name: String,
+    pub committer_email: String,
+}
+
+impl Source {
+    pub fn from_repo(
+        path: impl AsRef<std::path::Path>,
+    ) -> Result<(Self, OffsetDateTime), git2::Error> {
+        use git2::Repository;
+
+        let repo = Repository::open(path)?;
+        let remote = repo.remotes()?;
+        let remote = remote.get(0).expect("No remote associated to the repo");
+        let remote = repo.find_remote(remote)?;
+
+        let head = repo.head()?;
+
+        let commit = head.peel_to_commit()?;
+
+        let time = OffsetDateTime::from_unix_timestamp(commit.time().seconds()).unwrap();
+
+        let author = commit.author();
+        let committer = commit.committer();
+
+        Ok((
+            Self {
+                repo_url: remote.url().map(|s| s.to_string()),
+                branch_or_tag: head.name().unwrap().to_string(),
+                commit_id: commit.id().to_string(),
+                commit_msg: String::from_utf8_lossy(commit.message_bytes())
+                    .to_string()
+                    .lines()
+                    .next()
+                    .map_or(String::new(), |s| s.to_string()),
+                author_name: author.name().unwrap().to_string(),
+                author_email: author.email().unwrap().to_string(),
+                committer_name: committer.name().unwrap().to_string(),
+                committer_email: committer.email().unwrap().to_string(),
+            },
+            time,
+        ))
+    }
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Environment {
+    pub hostname: Option<String>,
+    pub cpu: String,
+
+    /// Advertised or nominal clock speed in Hertz.
+    pub clock_speed: u64,
+
+    /// Total number of bytes of memory provided by the system. */
+    pub memory: u64,
+    pub os_type: String,
+    pub software: Vec<VersionInfo>,
+
+    pub user_name: String,
+
+    /// Is set true when the data was gathered by a manual run,
+    /// possibly on a developer machine, instead of the usual benchmark server.
+    pub manual_run: bool,
+}
+
+impl Environment {
+    pub fn generate_from_current_config() -> Self {
+        use sysinfo::System;
+
+        let unknown_string = String::from("Unknown");
+        let mut system = System::new();
+        system.refresh_cpu();
+        system.refresh_cpu_frequency();
+        system.refresh_memory();
+
+        let (cpu, frequency) = match system.cpus().first() {
+            Some(cpu) => (
+                format!("{} @ {:.2}GHz", cpu.brand(), cpu.frequency() as f64 / 1000.0),
+                cpu.frequency() * 1_000_000,
+            ),
+            None => (unknown_string.clone(), 0),
+        };
+
+        let mut software = Vec::new();
+        if let Some(distribution) = System::name() {
+            software
+                .push(VersionInfo { name: distribution, version: String::from("distribution") });
+        }
+        if let Some(kernel) = System::kernel_version() {
+            software.push(VersionInfo { name: kernel, version: String::from("kernel") });
+        }
+        if let Some(os) = System::os_version() {
+            software.push(VersionInfo { name: os, version: String::from("kernel-release") });
+        }
+        if let Some(arch) = System::cpu_arch() {
+            software.push(VersionInfo { name: arch, version: String::from("arch") });
+        }
+
+        Self {
+            hostname: System::host_name(),
+            cpu,
+            clock_speed: frequency,
+            memory: system.total_memory(),
+            os_type: System::long_os_version().unwrap_or(unknown_string.clone()),
+            user_name: System::name().unwrap_or(unknown_string.clone()),
+            manual_run: false,
+            software,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct VersionInfo {
+    pub name: String,
+    pub version: String,
+}
diff --git a/xtask/src/bench/mod.rs b/xtask/src/bench/mod.rs
new file mode 100644
index 000000000..ea17b6f69
--- /dev/null
+++ b/xtask/src/bench/mod.rs
@@ -0,0 +1,1159 @@
+mod env_info;
+
+use std::collections::BTreeMap;
+use std::fmt::Display;
+use std::io::{Read, Seek, Write};
+use std::path::PathBuf;
+
+use anyhow::{bail, Context};
+use clap::Parser;
+use futures_util::TryStreamExt;
+use serde::Deserialize;
+use serde_json::json;
+use sha2::Digest;
+use tracing_subscriber::fmt::format::FmtSpan;
+use tracing_subscriber::layer::SubscriberExt;
+use tracing_subscriber::Layer;
+use uuid::Uuid;
+
+pub fn default_http_addr() -> String {
+    "127.0.0.1:7700".to_string()
+}
+pub fn default_report_folder() -> String {
+    "./bench/reports/".into()
+}
+
+pub fn default_asset_folder() -> String {
+    "./bench/assets/".into()
+}
+
+pub fn default_log_filter() -> String {
+    "info".into()
+}
+
+pub fn default_dashboard_url() -> String {
+    "http://localhost:9001".into()
+}
+
+#[derive(Debug, Clone)]
+pub struct Client {
+    base_url: Option<String>,
+    client: reqwest::Client,
+}
+
+impl Client {
+    pub fn new(
+        base_url: Option<String>,
+        api_key: Option<&str>,
+        timeout: Option<std::time::Duration>,
+    ) -> anyhow::Result<Self> {
+        let mut headers = reqwest::header::HeaderMap::new();
+        if let Some(api_key) = api_key {
+            headers.append(
+                reqwest::header::AUTHORIZATION,
+                reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}"))
+                    .context("Invalid authorization header")?,
+            );
+        }
+
+        let client = reqwest::ClientBuilder::new().default_headers(headers);
+        let client = if let Some(timeout) = timeout { client.timeout(timeout) } else { client };
+        let client = client.build()?;
+        Ok(Self { base_url, client })
+    }
+
+    pub fn request(&self, method: reqwest::Method, route: &str) -> reqwest::RequestBuilder {
+        if let Some(base_url) = &self.base_url {
+            if route.is_empty() {
+                self.client.request(method, base_url)
+            } else {
+                self.client.request(method, format!("{}/{}", base_url, route))
+            }
+        } else {
+            self.client.request(method, route)
+        }
+    }
+
+    pub fn get(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::GET, route)
+    }
+
+    pub fn put(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::PUT, route)
+    }
+
+    pub fn post(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::POST, route)
+    }
+
+    pub fn delete(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::DELETE, route)
+    }
+}
+
+/// Run benchmarks from a workload
+#[derive(Parser, Debug)]
+pub struct BenchDeriveArgs {
+    /// Filename of the workload file, pass multiple filenames
+    /// to run multiple workloads in the specified order.
+    ///
+    /// Each workload run will get its own report file.
+    #[arg(value_name = "WORKLOAD_FILE", last = false)]
+    workload_file: Vec<PathBuf>,
+
+    /// URL of the dashboard.
+    #[arg(long, default_value_t = default_dashboard_url())]
+    dashboard_url: String,
+
+    /// Directory to output reports.
+    #[arg(long, default_value_t = default_report_folder())]
+    report_folder: String,
+
+    /// Directory to store the remote assets.
+    #[arg(long, default_value_t = default_asset_folder())]
+    asset_folder: String,
+
+    /// Log directives
+    #[arg(short, long, default_value_t = default_log_filter())]
+    log_filter: String,
+
+    /// Benchmark dashboard API key
+    #[arg(long)]
+    api_key: Option<String>,
+
+    /// Meilisearch master keys
+    #[arg(long)]
+    master_key: Option<String>,
+
+    /// Authentication bearer for fetching assets
+    #[arg(long)]
+    assets_key: Option<String>,
+
+    /// Reason for the benchmark invocation
+    #[arg(short, long)]
+    reason: Option<String>,
+}
+
+#[derive(Deserialize)]
+pub struct Workload {
+    pub name: String,
+    pub run_count: u16,
+    pub extra_cli_args: Vec<String>,
+    pub assets: BTreeMap<String, Asset>,
+    pub commands: Vec<Command>,
+}
+
+#[derive(Deserialize, Clone)]
+pub struct Asset {
+    pub local_location: Option<String>,
+    pub remote_location: Option<String>,
+    #[serde(default)]
+    pub format: AssetFormat,
+    pub sha256: Option<String>,
+}
+
+#[derive(Deserialize, Default, Copy, Clone)]
+pub enum AssetFormat {
+    #[default]
+    Auto,
+    Json,
+    NdJson,
+    Raw,
+}
+impl AssetFormat {
+    fn to_content_type(self, filename: &str) -> &'static str {
+        match self {
+            AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename),
+            AssetFormat::Json => "application/json",
+            AssetFormat::NdJson => "application/x-ndjson",
+            AssetFormat::Raw => "application/octet-stream",
+        }
+    }
+
+    fn auto_detect(filename: &str) -> Self {
+        let path = std::path::Path::new(filename);
+        match path.extension().and_then(|extension| extension.to_str()) {
+            Some(extension) if extension.eq_ignore_ascii_case("json") => Self::Json,
+            Some(extension) if extension.eq_ignore_ascii_case("ndjson") => Self::NdJson,
+            extension => {
+                tracing::warn!(asset = filename, ?extension, "asset has format `Auto`, but extension was not recognized. Specify `Raw` format to suppress this warning.");
+                AssetFormat::Raw
+            }
+        }
+    }
+}
+
+#[derive(Clone, Deserialize)]
+pub struct Command {
+    pub route: String,
+    pub method: Method,
+    #[serde(default)]
+    pub body: Body,
+    #[serde(default)]
+    pub synchronous: SyncMode,
+}
+
+#[derive(Default, Clone, Deserialize)]
+#[serde(untagged)]
+pub enum Body {
+    Inline {
+        inline: serde_json::Value,
+    },
+    Asset {
+        asset: String,
+    },
+    #[default]
+    Empty,
+}
+
+impl Body {
+    pub fn get(
+        self,
+        assets: &BTreeMap<String, Asset>,
+        asset_folder: &str,
+    ) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
+        Ok(match self {
+            Body::Inline { inline: body } => Some((
+                serde_json::to_vec(&body)
+                    .context("serializing to bytes")
+                    .context("while getting inline body")?,
+                "application/json",
+            )),
+            Body::Asset { asset: name } => Some({
+                let context = || format!("while getting body from asset '{name}'");
+                let (mut file, format) =
+                    fetch_asset(&name, assets, asset_folder).with_context(context)?;
+                let mut buf = Vec::new();
+                file.read_to_end(&mut buf).with_context(context)?;
+                (buf, format.to_content_type(&name))
+            }),
+            Body::Empty => None,
+        })
+    }
+}
+
+fn fetch_asset(
+    name: &str,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<(std::fs::File, AssetFormat)> {
+    let asset =
+        assets.get(name).with_context(|| format!("could not find asset with name '{name}'"))?;
+    let filename = if let Some(local_filename) = &asset.local_location {
+        local_filename.clone()
+    } else {
+        format!("{asset_folder}/{name}")
+    };
+
+    Ok((
+        std::fs::File::open(&filename)
+            .with_context(|| format!("could not open asset '{name}' at '{filename}'"))?,
+        asset.format,
+    ))
+}
+
+impl Display for Command {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
+    }
+}
+
+#[derive(Debug, Clone, Copy, Deserialize)]
+pub enum Method {
+    GET,
+    POST,
+    PATCH,
+    DELETE,
+    PUT,
+}
+
+impl From<Method> for reqwest::Method {
+    fn from(value: Method) -> Self {
+        match value {
+            Method::GET => Self::GET,
+            Method::POST => Self::POST,
+            Method::PATCH => Self::PATCH,
+            Method::DELETE => Self::DELETE,
+            Method::PUT => Self::PUT,
+        }
+    }
+}
+
+#[derive(Default, Debug, Clone, Copy, Deserialize)]
+pub enum SyncMode {
+    DontWait,
+    #[default]
+    WaitForResponse,
+    WaitForTask,
+}
+
+pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
+    let filter: tracing_subscriber::filter::Targets =
+        args.log_filter.parse().context("invalid --log-filter")?;
+
+    let env = env_info::Environment::generate_from_current_config();
+    let (source, commit_date) =
+        env_info::Source::from_repo(".").context("could not get repository information")?;
+
+    let subscriber = tracing_subscriber::registry().with(
+        tracing_subscriber::fmt::layer()
+            .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
+            .with_filter(filter),
+    );
+    tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
+
+    let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
+    let _scope = rt.enter();
+
+    let assets_client =
+        Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
+
+    let dashboard_client = Client::new(
+        Some(format!("{}/api/v1", args.dashboard_url)),
+        args.api_key.as_deref(),
+        Some(std::time::Duration::from_secs(60)),
+    )?;
+
+    // reporting uses its own client because keeping the stream open to wait for entries
+    // blocks any other requests
+    // Also we don't want any pesky timeout because we don't know how much time it will take to recover the full trace
+    let logs_client = Client::new(
+        Some("http://127.0.0.1:7700/logs/stream".into()),
+        args.master_key.as_deref(),
+        None,
+    )?;
+
+    let meili_client = Client::new(
+        Some("http://127.0.0.1:7700".into()),
+        args.master_key.as_deref(),
+        Some(std::time::Duration::from_secs(60)),
+    )?;
+
+    rt.block_on(async {
+        let response = dashboard_client
+            .put("machine")
+            .json(&json!({"hostname": env.hostname}))
+            .send()
+            .await
+            .context("sending machine information")?;
+        if !response.status().is_success() {
+            bail!(
+                "could not send machine information: {} {}",
+                response.status(),
+                response.text().await.unwrap_or_else(|_| "unknown".into())
+            );
+        }
+
+        let commit_message = source.commit_msg.split('\n').next().unwrap();
+        let max_workloads = args.workload_file.len();
+        let reason: Option<&str> = args.reason.as_deref();
+        let response = dashboard_client
+            .put("invocation")
+            .json(&json!({
+                "commit": {
+                    "sha1": source.commit_id,
+                    "message": commit_message,
+                    "commit_date": commit_date,
+                    "branch": source.branch_or_tag
+                },
+                "machine_hostname": env.hostname,
+                "max_workloads": max_workloads,
+                "reason": reason
+            }))
+            .send()
+            .await
+            .context("sending invocation")?;
+
+        if !response.status().is_success() {
+            bail!(
+                "could not send new invocation: {}",
+                response.text().await.unwrap_or_else(|_| "unknown".into())
+            );
+        }
+
+        let invocation_uuid: Uuid =
+            response.json().await.context("could not deserialize invocation response as JSON")?;
+
+
+
+        tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
+        let workload_runs = tokio::spawn(
+            {
+                let dashboard_client = dashboard_client.clone();
+                async move {
+            for workload_file in args.workload_file.iter() {
+                let workload: Workload = serde_json::from_reader(
+                    std::fs::File::open(workload_file)
+                        .with_context(|| format!("error opening {}", workload_file.display()))?,
+                )
+                .with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
+
+                run_workload(
+                    &assets_client,
+                    &dashboard_client,
+                    &logs_client,
+                    &meili_client,
+                    invocation_uuid,
+                    args.master_key.as_deref(),
+                    workload,
+                    &args,
+                )
+                .await?;
+            }
+            Ok::<(), anyhow::Error>(())
+        }});
+
+        let abort_handle = workload_runs.abort_handle();
+
+        tokio::spawn({
+            let dashboard_client = dashboard_client.clone();
+            async move {
+                tracing::info!("press Ctrl-C to cancel the invocation");
+                match tokio::signal::ctrl_c().await {
+                    Ok(()) => {
+                        tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
+                        mark_as_failed(dashboard_client, invocation_uuid, None).await;
+                        abort_handle.abort();
+                    }
+                    Err(error) => tracing::warn!(
+                        error = &error as &dyn std::error::Error,
+                        "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
+                    ),
+                }
+            }
+        });
+
+        match workload_runs.await {
+            Ok(Ok(_)) => {
+                tracing::info!("Success");
+                Ok::<(), anyhow::Error>(())
+            }
+            Ok(Err(error)) => {
+                tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
+                mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
+                tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
+                Err(error)
+            },
+            Err(join_error) => {
+                match join_error.try_into_panic() {
+                    Ok(panic) => {
+                        tracing::error!("invocation panicked, attempting to report the failure to dashboard");
+                        mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
+                        std::panic::resume_unwind(panic)
+                    }
+                    Err(_) => {
+                        tracing::warn!("task was canceled");
+                        Ok(())
+                    }
+                }
+            },
+        }
+
+    })?;
+
+    Ok(())
+}
+
+async fn mark_as_failed(
+    dashboard_client: Client,
+    invocation_uuid: Uuid,
+    failure_reason: Option<String>,
+) {
+    let response = dashboard_client
+        .post("cancel-invocation")
+        .json(&json!({
+            "invocation_uuid": invocation_uuid,
+            "failure_reason": failure_reason,
+        }))
+        .send()
+        .await;
+    let response = match response {
+        Ok(response) => response,
+        Err(response_error) => {
+            tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
+            return;
+        }
+    };
+
+    if !response.status().is_success() {
+        tracing::error!(
+            %invocation_uuid,
+            "could not mark invocation as failed: {}",
+            response.text().await.unwrap()
+        );
+        return;
+    }
+    tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
+}
+
+#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
+#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
+async fn run_workload(
+    assets_client: &Client,
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    invocation_uuid: Uuid,
+    master_key: Option<&str>,
+    workload: Workload,
+    args: &BenchDeriveArgs,
+) -> anyhow::Result<()> {
+    fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
+
+    let response = dashboard_client
+        .put("workload")
+        .json(&json!({
+            "invocation_uuid": invocation_uuid,
+            "name": &workload.name,
+            "max_runs": workload.run_count,
+        }))
+        .send()
+        .await
+        .context("could not create new workload")?;
+
+    if !response.status().is_success() {
+        bail!("creating new workload failed: {}", response.text().await.unwrap())
+    }
+
+    let workload_uuid: Uuid =
+        response.json().await.context("could not deserialize JSON as UUID")?;
+
+    let mut tasks = Vec::new();
+
+    for i in 0..workload.run_count {
+        tasks.push(
+            run_workload_run(
+                dashboard_client,
+                logs_client,
+                meili_client,
+                workload_uuid,
+                master_key,
+                &workload,
+                args,
+                i,
+            )
+            .await?,
+        );
+    }
+
+    let mut reports = Vec::with_capacity(workload.run_count as usize);
+
+    for task in tasks {
+        reports.push(
+            task.await
+                .context("task panicked while processing report")?
+                .context("task failed while processing report")?,
+        );
+    }
+
+    tracing::info!(workload = workload.name, "Successful workload");
+
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, assets), fields(asset_count = assets.len()))]
+async fn fetch_assets(
+    client: &Client,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    let mut download_tasks = tokio::task::JoinSet::new();
+    for (name, asset) in assets {
+        // trying local
+        if let Some(local) = &asset.local_location {
+            match std::fs::File::open(local) {
+                Ok(file) => {
+                    if check_sha256(name, asset, file)? {
+                        continue;
+                    } else {
+                        tracing::warn!(asset = name, file = local, "found local resource for asset but hash differed, skipping to asset store");
+                    }
+                }
+                Err(error) => match error.kind() {
+                    std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
+                    }
+                    _ => tracing::warn!(
+                        error = &error as &dyn std::error::Error,
+                        "error checking local resource, skipping to asset store"
+                    ),
+                },
+            }
+        }
+
+        // checking asset store
+        let store_filename = format!("{}/{}", asset_folder, name);
+
+        match std::fs::File::open(&store_filename) {
+            Ok(file) => {
+                if check_sha256(name, asset, file)? {
+                    continue;
+                } else {
+                    tracing::warn!(asset = name, file = store_filename, "found resource for asset in asset store, but hash differed, skipping to remote method");
+                }
+            }
+            Err(error) => match error.kind() {
+                std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
+                }
+                _ => tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "error checking resource in store, skipping to remote method"
+                ),
+            },
+        }
+
+        // downloading remote
+        match &asset.remote_location {
+            Some(location) => {
+                std::fs::create_dir_all(asset_folder).with_context(|| format!("could not create asset folder at {asset_folder}"))?;
+                download_tasks.spawn({
+                    let client = client.clone();
+                    let name = name.to_string();
+                    let location = location.to_string();
+                    let store_filename = store_filename.clone();
+                    let asset = asset.clone();
+                    download_asset(client, name, asset, location, store_filename)});
+            },
+            None => bail!("asset {name} has no remote location, but was not found locally or in the asset store"),
+        }
+    }
+
+    while let Some(res) = download_tasks.join_next().await {
+        res.context("download task panicked")?.context("download task failed")?;
+    }
+
+    Ok(())
+}
+
+fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::Result<bool> {
+    let mut bytes = Vec::new();
+    file.read_to_end(&mut bytes).with_context(|| format!("hashing file for asset {name}"))?;
+    let mut file_hash = sha2::Sha256::new();
+    file_hash.update(&bytes);
+    let file_hash = file_hash.finalize();
+    let file_hash = format!("{:x}", file_hash);
+    tracing::debug!(hash = file_hash, "hashed local file");
+
+    Ok(match &asset.sha256 {
+        Some(hash) => {
+            tracing::debug!(hash, "hash from workload");
+            if hash.to_ascii_lowercase() == file_hash {
+                true
+            } else {
+                tracing::warn!(
+                    file_hash,
+                    asset_hash = hash.to_ascii_lowercase(),
+                    "hashes don't match"
+                );
+                false
+            }
+        }
+        None => {
+            tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file");
+            true
+        }
+    })
+}
+
+#[tracing::instrument(skip(client, asset, name), fields(asset = name))]
+async fn download_asset(
+    client: Client,
+    name: String,
+    asset: Asset,
+    src: String,
+    dest_filename: String,
+) -> anyhow::Result<()> {
+    let context = || format!("failure downloading asset {name} from {src}");
+
+    let response = client.get(&src).send().await.with_context(context)?;
+
+    let file = std::fs::File::options()
+        .create(true)
+        .truncate(true)
+        .write(true)
+        .read(true)
+        .open(&dest_filename)
+        .with_context(|| format!("creating destination file {dest_filename}"))
+        .with_context(context)?;
+
+    let mut dest = std::io::BufWriter::new(
+        file.try_clone().context("cloning I/O handle").with_context(context)?,
+    );
+
+    let total_len: Option<u64> = response
+        .headers()
+        .get(reqwest::header::CONTENT_LENGTH)
+        .and_then(|value| value.to_str().ok())
+        .and_then(|value| value.parse().ok());
+
+    let progress = tokio::spawn({
+        let name = name.clone();
+        async move {
+            loop {
+                match file.metadata().context("could not get file metadata") {
+                    Ok(metadata) => {
+                        let len = metadata.len();
+                        tracing::info!(
+                            asset = name,
+                            downloaded_bytes = len,
+                            total_bytes = total_len,
+                            "asset download in progress"
+                        );
+                    }
+                    Err(error) => {
+                        tracing::warn!(%error, "could not get file metadata");
+                    }
+                }
+                tokio::time::sleep(std::time::Duration::from_secs(60)).await;
+            }
+        }
+    });
+
+    let writing_context = || format!("while writing to destination file at {dest_filename}");
+
+    let mut response = response.bytes_stream();
+
+    while let Some(bytes) =
+        response.try_next().await.context("while downloading file").with_context(context)?
+    {
+        dest.write_all(&bytes).with_context(writing_context).with_context(context)?;
+    }
+
+    progress.abort();
+
+    let mut file = dest.into_inner().with_context(writing_context).with_context(context)?;
+
+    file.rewind().context("while rewinding asset file")?;
+
+    if !check_sha256(&name, &asset, file)? {
+        bail!("asset '{name}': sha256 mismatch for file {dest_filename} downloaded from {src}")
+    }
+
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
+#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
+async fn run_workload_run(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    workload_uuid: Uuid,
+    master_key: Option<&str>,
+    workload: &Workload,
+    args: &BenchDeriveArgs,
+    run_number: u16,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    delete_db();
+    build_meilisearch().await?;
+    let meilisearch =
+        start_meilisearch(meili_client, master_key, workload, &args.asset_folder).await?;
+    let processor = run_commands(
+        dashboard_client,
+        logs_client,
+        meili_client,
+        workload_uuid,
+        workload,
+        args,
+        run_number,
+    )
+    .await?;
+
+    kill_meilisearch(meilisearch).await;
+
+    tracing::info!(run_number, "Successful run");
+
+    Ok(processor)
+}
+
+async fn kill_meilisearch(mut meilisearch: tokio::process::Child) {
+    if let Err(error) = meilisearch.kill().await {
+        tracing::warn!(
+            error = &error as &dyn std::error::Error,
+            "while terminating Meilisearch server"
+        )
+    }
+}
+
+#[tracing::instrument]
+async fn build_meilisearch() -> anyhow::Result<()> {
+    let mut command = tokio::process::Command::new("cargo");
+    command.arg("build").arg("--release").arg("-p").arg("meilisearch");
+
+    command.kill_on_drop(true);
+
+    let mut builder = command.spawn().context("error building Meilisearch")?;
+
+    if !builder.wait().await.context("could not build Meilisearch")?.success() {
+        bail!("failed building Meilisearch")
+    }
+
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))]
+async fn start_meilisearch(
+    client: &Client,
+    master_key: Option<&str>,
+    workload: &Workload,
+    asset_folder: &str,
+) -> anyhow::Result<tokio::process::Child> {
+    let mut command = tokio::process::Command::new("cargo");
+    command
+        .arg("run")
+        .arg("--release")
+        .arg("-p")
+        .arg("meilisearch")
+        .arg("--bin")
+        .arg("meilisearch")
+        .arg("--");
+
+    command.arg("--db-path").arg("./_xtask_benchmark.ms");
+    if let Some(master_key) = master_key {
+        command.arg("--master-key").arg(master_key);
+    }
+    command.arg("--experimental-enable-logs-route");
+
+    for extra_arg in workload.extra_cli_args.iter() {
+        command.arg(extra_arg);
+    }
+
+    command.kill_on_drop(true);
+
+    let mut meilisearch = command.spawn().context("Error starting Meilisearch")?;
+
+    wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?;
+
+    Ok(meilisearch)
+}
+
+async fn wait_for_health(
+    client: &Client,
+    meilisearch: &mut tokio::process::Child,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    for i in 0..100 {
+        let res = run_command(client.clone(), health_command(), assets, asset_folder).await;
+        if res.is_ok() {
+            // check that this is actually the current Meilisearch instance that answered us
+            if let Some(exit_code) =
+                meilisearch.try_wait().context("cannot check Meilisearch server process status")?
+            {
+                tracing::error!("Got an health response from a different process");
+                bail!("Meilisearch server exited early with code {exit_code}");
+            }
+
+            return Ok(());
+        }
+        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+        // check whether the Meilisearch instance exited early (cut the wait)
+        if let Some(exit_code) =
+            meilisearch.try_wait().context("cannot check Meilisearch server process status")?
+        {
+            bail!("Meilisearch server exited early with code {exit_code}");
+        }
+        tracing::debug!(attempt = i, "Waiting for Meilisearch to go up");
+    }
+    bail!("meilisearch is not responding")
+}
+
+fn health_command() -> Command {
+    Command {
+        route: "/health".into(),
+        method: Method::GET,
+        body: Default::default(),
+        synchronous: SyncMode::WaitForResponse,
+    }
+}
+
+fn delete_db() {
+    let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms");
+}
+
+async fn run_commands(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    workload_uuid: Uuid,
+    workload: &Workload,
+    args: &BenchDeriveArgs,
+    run_number: u16,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let report_folder = &args.report_folder;
+    let workload_name = &workload.name;
+
+    std::fs::create_dir_all(report_folder)
+        .with_context(|| format!("could not create report directory at {report_folder}"))?;
+
+    let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
+    let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");
+
+    let report_handle = start_report(logs_client, trace_filename).await?;
+
+    for batch in workload
+        .commands
+        .as_slice()
+        .split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
+    {
+        run_batch(meili_client, batch, &workload.assets, &args.asset_folder).await?;
+    }
+
+    let processor =
+        stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
+            .await?;
+
+    Ok(processor)
+}
+
+async fn stop_report(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    workload_uuid: Uuid,
+    filename: String,
+    report_handle: tokio::task::JoinHandle<anyhow::Result<std::fs::File>>,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let response = logs_client.delete("").send().await.context("while stopping report")?;
+    if !response.status().is_success() {
+        bail!("received HTTP {} while stopping report", response.status())
+    }
+
+    let mut file = tokio::time::timeout(std::time::Duration::from_secs(1000), report_handle)
+        .await
+        .context("while waiting for the end of the report")?
+        .context("report writing task panicked")?
+        .context("while writing report")?;
+
+    file.rewind().context("while rewinding report file")?;
+
+    let process_handle = tokio::task::spawn({
+        let dashboard_client = dashboard_client.clone();
+        async move {
+            let span = tracing::info_span!("processing trace to report", filename);
+            let _guard = span.enter();
+            let report = tracing_trace::processor::span_stats::to_call_stats(
+                tracing_trace::TraceReader::new(std::io::BufReader::new(file)),
+            )
+            .context("could not convert trace to report")?;
+            let context = || format!("writing report to {filename}");
+
+            let response = dashboard_client
+                .put("run")
+                .json(&json!({
+                    "workload_uuid": workload_uuid,
+                    "data": report
+                }))
+                .send()
+                .await
+                .context("sending new run")?;
+
+            if !response.status().is_success() {
+                bail!(
+                    "sending new run failed: {}",
+                    response.text().await.unwrap_or_else(|_| "unknown".into())
+                )
+            }
+
+            let mut output_file = std::io::BufWriter::new(
+                std::fs::File::options()
+                    .create(true)
+                    .truncate(true)
+                    .write(true)
+                    .read(true)
+                    .open(&filename)
+                    .with_context(context)?,
+            );
+
+            for (key, value) in report {
+                serde_json::to_writer(&mut output_file, &json!({key: value}))
+                    .context("serializing span stat")?;
+                writeln!(&mut output_file).with_context(context)?;
+            }
+            output_file.flush().with_context(context)?;
+            let mut output_file = output_file.into_inner().with_context(context)?;
+
+            output_file.rewind().context("could not rewind output_file").with_context(context)?;
+
+            tracing::info!("success");
+            Ok(output_file)
+        }
+    });
+
+    Ok(process_handle)
+}
+
+async fn start_report(
+    logs_client: &Client,
+    filename: String,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let report_file = std::fs::File::options()
+        .create(true)
+        .truncate(true)
+        .write(true)
+        .read(true)
+        .open(&filename)
+        .with_context(|| format!("could not create file at {filename}"))?;
+    let mut report_file = std::io::BufWriter::new(report_file);
+
+    let response = logs_client
+        .post("")
+        .json(&json!({
+            "mode": "profile",
+            "target": "indexing::=trace"
+        }))
+        .send()
+        .await
+        .context("failed to start report")?;
+
+    let code = response.status();
+    if code.is_client_error() {
+        tracing::error!(%code, "request error when trying to start report");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("response error when trying to start report")?;
+        bail!(
+            "request error when trying to start report: server responded with error code {code} and '{response}'"
+        )
+    } else if code.is_server_error() {
+        tracing::error!(%code, "server error when trying to start report");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("response error trying to start report")?;
+        bail!("server error when trying to start report: server responded with error code {code} and '{response}'")
+    }
+
+    Ok(tokio::task::spawn(async move {
+        let mut stream = response.bytes_stream();
+        while let Some(bytes) = stream.try_next().await.context("while waiting for report")? {
+            report_file
+                .write_all(&bytes)
+                .with_context(|| format!("while writing report to {filename}"))?;
+        }
+        report_file.into_inner().with_context(|| format!("while writing report to {filename}"))
+    }))
+}
+
+async fn run_batch(
+    client: &Client,
+    batch: &[Command],
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    let [.., last] = batch else { return Ok(()) };
+    let sync = last.synchronous;
+
+    let mut tasks = tokio::task::JoinSet::new();
+
+    for command in batch {
+        // FIXME: you probably don't want to copy assets everytime here
+        tasks.spawn({
+            let client = client.clone();
+            let command = command.clone();
+            let assets = assets.clone();
+            let asset_folder = asset_folder.to_owned();
+
+            async move { run_command(client, command, &assets, &asset_folder).await }
+        });
+    }
+
+    while let Some(result) = tasks.join_next().await {
+        result
+            .context("panicked while executing command")?
+            .context("error while executing command")?;
+    }
+
+    match sync {
+        SyncMode::DontWait => {}
+        SyncMode::WaitForResponse => {}
+        SyncMode::WaitForTask => wait_for_tasks(client).await?,
+    }
+
+    Ok(())
+}
+
+async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
+    loop {
+        let response = client
+            .get("tasks?statuses=enqueued,processing")
+            .send()
+            .await
+            .context("could not wait for tasks")?;
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response to JSON")
+            .context("could not wait for tasks")?;
+        match response.get("total") {
+            Some(serde_json::Value::Number(number)) => {
+                let number = number.as_u64().with_context(|| {
+                    format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
+                })?;
+                if number == 0 {
+                    break;
+                } else {
+                    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                    continue;
+                }
+            }
+            Some(thing_else) => {
+                bail!(format!(
+                    "waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
+                ))
+            }
+            None => {
+                bail!(format!(
+                    "waiting for tasks: expected response to contain 'total', got '{response}'"
+                ))
+            }
+        }
+    }
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
+async fn run_command(
+    client: Client,
+    mut command: Command,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    // memtake the body here to leave an empty body in its place, so that command is not partially moved-out
+    let body = std::mem::take(&mut command.body)
+        .get(assets, asset_folder)
+        .with_context(|| format!("while getting body for command {command}"))?;
+
+    let request = client.request(command.method.into(), &command.route);
+
+    let request = if let Some((body, content_type)) = body {
+        request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
+    } else {
+        request
+    };
+
+    let response =
+        request.send().await.with_context(|| format!("error sending command: {}", command))?;
+
+    let code = response.status();
+    if code.is_client_error() {
+        tracing::error!(%command, %code, "error in workload file");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("parsing error in workload file when sending command")?;
+        bail!("error in workload file: server responded with error code {code} and '{response}'")
+    } else if code.is_server_error() {
+        tracing::error!(%command, %code, "server error");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("parsing server error when sending command")?;
+        bail!("server error: server responded with error code {code} and '{response}'")
+    }
+
+    Ok(())
+}
diff --git a/xtask/src/lib.rs b/xtask/src/lib.rs
new file mode 100644
index 000000000..cbda260db
--- /dev/null
+++ b/xtask/src/lib.rs
@@ -0,0 +1 @@
+pub mod bench;
diff --git a/xtask/src/main.rs b/xtask/src/main.rs
index 6570dc67b..b81424666 100644
--- a/xtask/src/main.rs
+++ b/xtask/src/main.rs
@@ -1,6 +1,7 @@
 use std::collections::HashSet;
 
 use clap::Parser;
+use xtask::bench::BenchDeriveArgs;
 
 /// List features available in the workspace
 #[derive(Parser, Debug)]
@@ -17,13 +18,16 @@ struct ListFeaturesDeriveArgs {
 #[command(bin_name = "cargo xtask")]
 enum Command {
     ListFeatures(ListFeaturesDeriveArgs),
+    Bench(BenchDeriveArgs),
 }
 
-fn main() {
+fn main() -> anyhow::Result<()> {
     let args = Command::parse();
     match args {
         Command::ListFeatures(args) => list_features(args),
+        Command::Bench(args) => xtask::bench::run(args)?,
     }
+    Ok(())
 }
 
 fn list_features(args: ListFeaturesDeriveArgs) {

From c608b3f9b5fa037254f7bbbaa1a2e3298087f664 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 27 Feb 2024 18:34:52 +0100
Subject: [PATCH 05/12] Factor vergen stuff to a build-info crate

---
 Cargo.lock                                    | 149 +++++++++----
 Cargo.toml                                    |   2 +-
 Dockerfile                                    |   2 +-
 build-info/Cargo.toml                         |  18 ++
 build-info/build.rs                           |  22 ++
 build-info/src/lib.rs                         | 203 ++++++++++++++++++
 meilisearch/Cargo.toml                        |   2 +-
 meilisearch/build.rs                          |  13 --
 .../src/analytics/segment_analytics.rs        |   4 +-
 meilisearch/src/lib.rs                        |  27 ---
 meilisearch/src/main.rs                       |  22 +-
 meilisearch/src/routes/mod.rs                 |  14 +-
 xtask/Cargo.toml                              |  11 +-
 xtask/src/bench/env_info.rs                   |  54 -----
 xtask/src/bench/mod.rs                        |  12 +-
 15 files changed, 396 insertions(+), 159 deletions(-)
 create mode 100644 build-info/Cargo.toml
 create mode 100644 build-info/build.rs
 create mode 100644 build-info/src/lib.rs

diff --git a/Cargo.lock b/Cargo.lock
index eca3b2fbc..700bb2653 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -356,9 +356,9 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.79"
+version = "1.0.80"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
+checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
 dependencies = [
  "backtrace",
 ]
@@ -628,6 +628,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "build-info"
+version = "1.7.0"
+dependencies = [
+ "anyhow",
+ "time",
+ "vergen-git2",
+]
+
 [[package]]
 name = "bumpalo"
 version = "3.13.0"
@@ -1348,7 +1357,16 @@ version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
 dependencies = [
- "derive_builder_macro",
+ "derive_builder_macro 0.12.0",
+]
+
+[[package]]
+name = "derive_builder"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f59169f400d8087f238c5c0c7db6a28af18681717f3b623227d92f397e938c7"
+dependencies = [
+ "derive_builder_macro 0.13.1",
 ]
 
 [[package]]
@@ -1363,13 +1381,35 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "derive_builder_core"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4ec317cc3e7ef0928b0ca6e4a634a4d6c001672ae210438cf114a83e56b018d"
+dependencies = [
+ "darling 0.14.4",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "derive_builder_macro"
 version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
 dependencies = [
- "derive_builder_core",
+ "derive_builder_core 0.12.0",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "870368c3fb35b8031abb378861d4460f573b92238ec2152c927a21f77e3e0127"
+dependencies = [
+ "derive_builder_core 0.13.1",
  "syn 1.0.109",
 ]
 
@@ -2088,11 +2128,11 @@ checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"
 
 [[package]]
 name = "git2"
-version = "0.16.1"
+version = "0.18.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc"
+checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.4.1",
  "libc",
  "libgit2-sys",
  "log",
@@ -2389,7 +2429,7 @@ dependencies = [
  "bincode",
  "crossbeam",
  "csv",
- "derive_builder",
+ "derive_builder 0.12.0",
  "dump",
  "enum-iterator",
  "file-store",
@@ -2506,7 +2546,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
 dependencies = [
  "hermit-abi",
- "rustix 0.38.26",
+ "rustix 0.38.31",
  "windows-sys 0.52.0",
 ]
 
@@ -2628,15 +2668,15 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.150"
+version = "0.2.153"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 
 [[package]]
 name = "libgit2-sys"
-version = "0.14.2+1.5.1"
+version = "0.16.2+1.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
+checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
 dependencies = [
  "cc",
  "libc",
@@ -2683,9 +2723,9 @@ dependencies = [
 
 [[package]]
 name = "libz-sys"
-version = "1.1.12"
+version = "1.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b"
+checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6"
 dependencies = [
  "cc",
  "libc",
@@ -3122,6 +3162,7 @@ dependencies = [
  "async-trait",
  "brotli",
  "bstr",
+ "build-info",
  "byte-unit",
  "bytes",
  "cargo_toml",
@@ -3193,7 +3234,6 @@ dependencies = [
  "url",
  "urlencoding",
  "uuid",
- "vergen",
  "walkdir",
  "yaup",
  "zip",
@@ -3530,6 +3570,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "number_prefix"
 version = "0.4.0"
@@ -4144,15 +4193,6 @@ dependencies = [
  "bitflags 1.3.2",
 ]
 
-[[package]]
-name = "redox_syscall"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
-dependencies = [
- "bitflags 1.3.2",
-]
-
 [[package]]
 name = "redox_users"
 version = "0.4.3"
@@ -4343,9 +4383,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.26"
+version = "0.38.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a"
+checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
 dependencies = [
  "bitflags 2.4.1",
  "errno",
@@ -4881,14 +4921,13 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.9.0"
+version = "3.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
+checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
 dependencies = [
  "cfg-if",
  "fastrand",
- "redox_syscall 0.4.1",
- "rustix 0.38.26",
+ "rustix 0.38.31",
  "windows-sys 0.52.0",
 ]
 
@@ -4948,13 +4987,15 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.32"
+version = "0.3.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe80ced77cbfb4cb91a94bf72b378b4b6791a0d9b7f09d0be747d1bdff4e68bd"
+checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
 dependencies = [
  "deranged",
  "itoa",
+ "libc",
  "num-conv",
+ "num_threads",
  "powerfmt",
  "serde",
  "time-core",
@@ -5008,7 +5049,7 @@ version = "0.14.1"
 source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
 dependencies = [
  "aho-corasick",
- "derive_builder",
+ "derive_builder 0.12.0",
  "esaxx-rs",
  "getrandom",
  "itertools 0.11.0",
@@ -5434,18 +5475,42 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
 
 [[package]]
 name = "vergen"
-version = "7.5.1"
+version = "9.0.0-beta.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f21b881cd6636ece9735721cf03c1fe1e774fe258683d084bb2812ab67435749"
+checksum = "107dc53b443fe8cc380798abb75ad6b7038281165109afea1f1b28bb47047ed5"
 dependencies = [
  "anyhow",
- "cfg-if",
- "enum-iterator",
+ "derive_builder 0.13.1",
  "getset",
+ "rustversion",
+ "vergen-lib",
+]
+
+[[package]]
+name = "vergen-git2"
+version = "1.0.0-beta.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8875c5d71074bb67118774e3d795ab6fe77c3ae3161cb54e19104cabc49487f1"
+dependencies = [
+ "anyhow",
+ "derive_builder 0.13.1",
  "git2",
  "rustversion",
- "thiserror",
  "time",
+ "vergen",
+ "vergen-lib",
+]
+
+[[package]]
+name = "vergen-lib"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26ebfba72ba904559f25f41ea1512335b5a46459084258cea0857549d9645187"
+dependencies = [
+ "anyhow",
+ "derive_builder 0.13.1",
+ "getset",
+ "rustversion",
 ]
 
 [[package]]
@@ -5873,9 +5938,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
 
 [[package]]
 name = "winnow"
-version = "0.5.4"
+version = "0.5.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acaaa1190073b2b101e15083c38ee8ec891b5e05cbee516521e94ec008f61e64"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
 dependencies = [
  "memchr",
 ]
@@ -5904,11 +5969,11 @@ name = "xtask"
 version = "1.7.0"
 dependencies = [
  "anyhow",
+ "build-info",
  "cargo_metadata",
  "clap",
  "futures-core",
  "futures-util",
- "git2",
  "reqwest",
  "serde",
  "serde_json",
diff --git a/Cargo.toml b/Cargo.toml
index 11190025a..1d79fd196 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@ members = [
     "benchmarks",
     "fuzzers",
     "tracing-trace",
-    "xtask",
+    "xtask", "build-info",
 ]
 
 [workspace.package]
diff --git a/Dockerfile b/Dockerfile
index dd2cfc134..5b227e6fc 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,7 +8,7 @@ WORKDIR /
 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
 ARG     GIT_TAG
-ENV     VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
+ENV     VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
 ENV     RUSTFLAGS="-C target-feature=-crt-static"
 
 COPY    . .
diff --git a/build-info/Cargo.toml b/build-info/Cargo.toml
new file mode 100644
index 000000000..50854a642
--- /dev/null
+++ b/build-info/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "build-info"
+version.workspace = true
+authors.workspace = true
+description.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+time = { version = "0.3.34", features = ["parsing"] }
+
+[build-dependencies]
+anyhow = "1.0.80"
+vergen-git2 = "1.0.0-beta.2"
diff --git a/build-info/build.rs b/build-info/build.rs
new file mode 100644
index 000000000..b1ec0ab47
--- /dev/null
+++ b/build-info/build.rs
@@ -0,0 +1,22 @@
+fn main() {
+    if let Err(err) = emit_git_variables() {
+        println!("cargo:warning=vergen: {}", err);
+    }
+}
+
+fn emit_git_variables() -> anyhow::Result<()> {
+    // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
+    // in the corresponding GitHub workflow (publish_docker.yml).
+    // This is due to the Dockerfile building the binary outside of the git directory.
+    let mut builder = vergen_git2::Git2Builder::default();
+
+    builder.branch(true);
+    builder.commit_timestamp(true);
+    builder.commit_message(true);
+    builder.describe(true, true, None);
+    builder.sha(false);
+
+    let git2 = builder.build()?;
+
+    vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
+}
diff --git a/build-info/src/lib.rs b/build-info/src/lib.rs
new file mode 100644
index 000000000..cfcefb4a2
--- /dev/null
+++ b/build-info/src/lib.rs
@@ -0,0 +1,203 @@
+use time::format_description::well_known::Iso8601;
+
+#[derive(Debug, Clone)]
+pub struct BuildInfo {
+    pub branch: Option<&'static str>,
+    pub describe: Option<DescribeResult>,
+    pub commit_sha1: Option<&'static str>,
+    pub commit_msg: Option<&'static str>,
+    pub commit_timestamp: Option<time::OffsetDateTime>,
+}
+
+impl BuildInfo {
+    pub fn from_build() -> Self {
+        let branch: Option<&'static str> = option_env!("VERGEN_GIT_BRANCH");
+        let describe = DescribeResult::from_build();
+        let commit_sha1 = option_env!("VERGEN_GIT_SHA");
+        let commit_msg = option_env!("VERGEN_GIT_COMMIT_MESSAGE");
+        let commit_timestamp = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP");
+
+        let commit_timestamp = commit_timestamp.and_then(|commit_timestamp| {
+            time::OffsetDateTime::parse(commit_timestamp, &Iso8601::DEFAULT).ok()
+        });
+
+        Self { branch, describe, commit_sha1, commit_msg, commit_timestamp }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum DescribeResult {
+    Prototype { name: &'static str },
+    Release { version: &'static str, major: u64, minor: u64, patch: u64 },
+    Prerelease { version: &'static str, major: u64, minor: u64, patch: u64, rc: u64 },
+    NotATag { describe: &'static str },
+}
+
+impl DescribeResult {
+    pub fn new(describe: &'static str) -> Self {
+        if let Some(name) = prototype_name(describe) {
+            Self::Prototype { name }
+        } else if let Some(release) = release_version(describe) {
+            release
+        } else if let Some(prerelease) = prerelease_version(describe) {
+            prerelease
+        } else {
+            Self::NotATag { describe }
+        }
+    }
+
+    pub fn from_build() -> Option<Self> {
+        let describe: &'static str = option_env!("VERGEN_GIT_DESCRIBE")?;
+        Some(Self::new(describe))
+    }
+
+    pub fn as_tag(&self) -> Option<&'static str> {
+        match self {
+            DescribeResult::Prototype { name } => Some(name),
+            DescribeResult::Release { version, .. } => Some(version),
+            DescribeResult::Prerelease { version, .. } => Some(version),
+            DescribeResult::NotATag { describe: _ } => None,
+        }
+    }
+
+    pub fn as_prototype(&self) -> Option<&'static str> {
+        match self {
+            DescribeResult::Prototype { name } => Some(name),
+            DescribeResult::Release { .. }
+            | DescribeResult::Prerelease { .. }
+            | DescribeResult::NotATag { .. } => None,
+        }
+    }
+}
+
+/// Parses the input as a prototype name.
+///
+/// Returns `Some(prototype_name)` if the following conditions are met on this value:
+///
+/// 1. starts with `prototype-`,
+/// 2. ends with `-<some_number>`,
+/// 3. does not end with `<some_number>-<some_number>`.
+///
+/// Otherwise, returns `None`.
+fn prototype_name(describe: &'static str) -> Option<&'static str> {
+    if !describe.starts_with("prototype-") {
+        return None;
+    }
+
+    let mut rsplit_prototype = describe.rsplit('-');
+    // last component MUST be a number
+    rsplit_prototype.next()?.parse::<u64>().ok()?;
+    // before than last component SHALL NOT be a number
+    rsplit_prototype.next()?.parse::<u64>().err()?;
+
+    Some(describe)
+}
+
+fn release_version(describe: &'static str) -> Option<DescribeResult> {
+    if !describe.starts_with('v') {
+        return None;
+    }
+
+    // full release version don't contain a `-`
+    if describe.contains('-') {
+        return None;
+    }
+
+    // full release version parse as vX.Y.Z, with X, Y, Z numbers.
+    let mut dots = describe[1..].split('.');
+    let major: u64 = dots.next()?.parse().ok()?;
+    let minor: u64 = dots.next()?.parse().ok()?;
+    let patch: u64 = dots.next()?.parse().ok()?;
+
+    if dots.next().is_some() {
+        return None;
+    }
+
+    Some(DescribeResult::Release { version: describe, major, minor, patch })
+}
+
+fn prerelease_version(describe: &'static str) -> Option<DescribeResult> {
+    // prerelease version is in the shape vM.N.P-rc.C
+    let mut hyphen = describe.rsplit('-');
+    let prerelease = hyphen.next()?;
+    if !prerelease.starts_with("rc.") {
+        return None;
+    }
+
+    let rc: u64 = prerelease[3..].parse().ok()?;
+
+    let release = hyphen.next()?;
+
+    let DescribeResult::Release { version: _, major, minor, patch } = release_version(release)?
+    else {
+        return None;
+    };
+
+    Some(DescribeResult::Prerelease { version: describe, major, minor, patch, rc })
+}
+
+#[cfg(test)]
+mod test {
+    use super::DescribeResult;
+
+    fn assert_not_a_tag(describe: &'static str) {
+        assert_eq!(DescribeResult::NotATag { describe }, DescribeResult::new(describe))
+    }
+
+    fn assert_proto(describe: &'static str) {
+        assert_eq!(DescribeResult::Prototype { name: describe }, DescribeResult::new(describe))
+    }
+
+    fn assert_release(describe: &'static str, major: u64, minor: u64, patch: u64) {
+        assert_eq!(
+            DescribeResult::Release { version: describe, major, minor, patch },
+            DescribeResult::new(describe)
+        )
+    }
+
+    fn assert_prerelease(describe: &'static str, major: u64, minor: u64, patch: u64, rc: u64) {
+        assert_eq!(
+            DescribeResult::Prerelease { version: describe, major, minor, patch, rc },
+            DescribeResult::new(describe)
+        )
+    }
+
+    #[test]
+    fn not_a_tag() {
+        assert_not_a_tag("whatever-fuzzy");
+        assert_not_a_tag("whatever-fuzzy-5-ggg-dirty");
+        assert_not_a_tag("whatever-fuzzy-120-ggg-dirty");
+
+        // technically a tag, but not a proto nor a version, so not parsed as a tag
+        assert_not_a_tag("whatever");
+
+        // dirty version
+        assert_not_a_tag("v1.7.0-1-ggga-dirty");
+        assert_not_a_tag("v1.7.0-rc.1-1-ggga-dirty");
+
+        // after version
+        assert_not_a_tag("v1.7.0-1-ggga");
+        assert_not_a_tag("v1.7.0-rc.1-1-ggga");
+
+        // after proto
+        assert_not_a_tag("protoype-tag-0-1-ggga");
+        assert_not_a_tag("protoype-tag-0-1-ggga-dirty");
+    }
+
+    #[test]
+    fn prototype() {
+        assert_proto("prototype-tag-0");
+        assert_proto("prototype-tag-10");
+        assert_proto("prototype-long-name-tag-10");
+    }
+
+    #[test]
+    fn release() {
+        assert_release("v1.7.2", 1, 7, 2);
+    }
+
+    #[test]
+    fn prerelease() {
+        assert_prerelease("v1.7.2-rc.3", 1, 7, 2, 3);
+    }
+}
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index fc4f5aa8b..b65c466ca 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -107,6 +107,7 @@ tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.9"
+build-info = { version = "1.7.0", path = "../build-info" }
 
 [dev-dependencies]
 actix-rt = "2.9.0"
@@ -131,7 +132,6 @@ reqwest = { version = "0.11.23", features = [
 sha-1 = { version = "0.10.1", optional = true }
 static-files = { version = "0.2.3", optional = true }
 tempfile = { version = "3.9.0", optional = true }
-vergen = { version = "7.5.1", default-features = false, features = ["git"] }
 zip = { version = "0.6.6", optional = true }
 
 [features]
diff --git a/meilisearch/build.rs b/meilisearch/build.rs
index c839b6e33..dc24b0449 100644
--- a/meilisearch/build.rs
+++ b/meilisearch/build.rs
@@ -1,17 +1,4 @@
-use vergen::{vergen, Config, SemverKind};
-
 fn main() {
-    // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
-    // in the corresponding GitHub workflow (publish_docker.yml).
-    // This is due to the Dockerfile building the binary outside of the git directory.
-    let mut config = Config::default();
-    // allow using non-annotated tags
-    *config.git_mut().semver_kind_mut() = SemverKind::Lightweight;
-
-    if let Err(e) = vergen(config) {
-        println!("cargo:warning=vergen: {}", e);
-    }
-
     #[cfg(feature = "mini-dashboard")]
     mini_dashboard::setup_mini_dashboard().expect("Could not load the mini-dashboard assets");
 }
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 55ddb4747..7dfc52900 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -473,7 +473,9 @@ impl Segment {
             create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
         {
             // Replace the version number with the prototype name if any.
-            let version = if let Some(prototype) = crate::prototype_name() {
+            let version = if let Some(prototype) = build_info::DescribeResult::from_build()
+                .and_then(|describe| describe.as_prototype())
+            {
                 prototype
             } else {
                 env!("CARGO_PKG_VERSION")
diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs
index 9d9274b9d..820f1ae42 100644
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -536,30 +536,3 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
 pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
     config.service(web::resource("/").route(web::get().to(routes::running)));
 }
-
-/// Parses the output of
-/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
-///  as a prototype name.
-///
-/// Returns `Some(prototype_name)` if the following conditions are met on this value:
-///
-/// 1. starts with `prototype-`,
-/// 2. ends with `-<some_number>`,
-/// 3. does not end with `<some_number>-<some_number>`.
-///
-/// Otherwise, returns `None`.
-pub fn prototype_name() -> Option<&'static str> {
-    let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?;
-
-    if !prototype.starts_with("prototype-") {
-        return None;
-    }
-
-    let mut rsplit_prototype = prototype.rsplit('-');
-    // last component MUST be a number
-    rsplit_prototype.next()?.parse::<u64>().ok()?;
-    // before than last component SHALL NOT be a number
-    rsplit_prototype.next()?.parse::<u64>().err()?;
-
-    Some(prototype)
-}
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index f1f93dd99..79ca7ec80 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -12,8 +12,8 @@ use is_terminal::IsTerminal;
 use meilisearch::analytics::Analytics;
 use meilisearch::option::LogMode;
 use meilisearch::{
-    analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType,
-    LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer,
+    analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
+    LogStderrType, Opt, SubscriberForSecondLayer,
 };
 use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
 use mimalloc::MiMalloc;
@@ -163,8 +163,8 @@ pub fn print_launch_resume(
     analytics: Arc<dyn Analytics>,
     config_read_from: Option<PathBuf>,
 ) {
-    let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
-    let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
+    let build_info = build_info::BuildInfo::from_build();
+
     let protocol =
         if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
     let ascii_name = r#"
@@ -189,10 +189,18 @@ pub fn print_launch_resume(
     eprintln!("Database path:\t\t{:?}", opt.db_path);
     eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
     eprintln!("Environment:\t\t{:?}", opt.env);
-    eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string());
-    eprintln!("Commit date:\t\t{:?}", commit_date.to_string());
+    eprintln!("Commit SHA:\t\t{:?}", build_info.commit_sha1.unwrap_or("unknown"));
+    eprintln!(
+        "Commit date:\t\t{:?}",
+        build_info
+            .commit_timestamp
+            .and_then(|commit_timestamp| commit_timestamp
+                .format(&time::format_description::well_known::Iso8601::DEFAULT)
+                .ok())
+            .unwrap_or("unknown".into())
+    );
     eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
-    if let Some(prototype) = prototype_name() {
+    if let Some(prototype) = build_info.describe.and_then(|describe| describe.as_prototype()) {
         eprintln!("Prototype:\t\t{:?}", prototype);
     }
 
diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs
index 249103e12..1c1465582 100644
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -359,12 +359,18 @@ async fn get_version(
 ) -> HttpResponse {
     analytics.publish("Version Seen".to_string(), json!(null), Some(&req));
 
-    let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
-    let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
+    let build_info = build_info::BuildInfo::from_build();
 
     HttpResponse::Ok().json(VersionResponse {
-        commit_sha: commit_sha.to_string(),
-        commit_date: commit_date.to_string(),
+        commit_sha: build_info.commit_sha1.unwrap_or("unknown").to_string(),
+        commit_date: build_info
+            .commit_timestamp
+            .and_then(|commit_timestamp| {
+                commit_timestamp
+                    .format(&time::format_description::well_known::Iso8601::DEFAULT)
+                    .ok()
+            })
+            .unwrap_or("unknown".into()),
         pkg_version: env!("CARGO_PKG_VERSION").to_string(),
     })
 }
diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml
index a59a79e53..0df8161ce 100644
--- a/xtask/Cargo.toml
+++ b/xtask/Cargo.toml
@@ -12,11 +12,11 @@ license.workspace = true
 
 [dependencies]
 anyhow = "1.0.79"
+build-info = { version = "1.7.0", path = "../build-info" }
 cargo_metadata = "0.18.1"
 clap = { version = "4.4.14", features = ["derive"] }
 futures-core = "0.3.30"
 futures-util = "0.3.30"
-git2 = { version = "0.16", default_features = false }
 reqwest = { version = "0.11.23", features = [
     "stream",
     "json",
@@ -26,7 +26,11 @@ serde = { version = "1.0.195", features = ["derive"] }
 serde_json = "1.0.111"
 sha2 = "0.10.8"
 sysinfo = "0.30.5"
-time = { version = "0.3.32", features = ["serde", "serde-human-readable"] }
+time = { version = "0.3.32", features = [
+    "serde",
+    "serde-human-readable",
+    "macros",
+] }
 tokio = { version = "1.35.1", features = [
     "rt",
     "net",
@@ -38,3 +42,6 @@ tracing = "0.1.40"
 tracing-subscriber = "0.3.18"
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 uuid = { version = "1.7.0", features = ["v7", "serde"] }
+
+[build-dependencies]
+anyhow = "1.0.79"
diff --git a/xtask/src/bench/env_info.rs b/xtask/src/bench/env_info.rs
index 5cbeb4274..08dacf915 100644
--- a/xtask/src/bench/env_info.rs
+++ b/xtask/src/bench/env_info.rs
@@ -1,58 +1,4 @@
 use serde::{Deserialize, Serialize};
-use time::OffsetDateTime;
-
-#[derive(Debug, Clone, Deserialize, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct Source {
-    pub repo_url: Option<String>,
-    pub branch_or_tag: String,
-    pub commit_id: String,
-    pub commit_msg: String,
-    pub author_name: String,
-    pub author_email: String,
-    pub committer_name: String,
-    pub committer_email: String,
-}
-
-impl Source {
-    pub fn from_repo(
-        path: impl AsRef<std::path::Path>,
-    ) -> Result<(Self, OffsetDateTime), git2::Error> {
-        use git2::Repository;
-
-        let repo = Repository::open(path)?;
-        let remote = repo.remotes()?;
-        let remote = remote.get(0).expect("No remote associated to the repo");
-        let remote = repo.find_remote(remote)?;
-
-        let head = repo.head()?;
-
-        let commit = head.peel_to_commit()?;
-
-        let time = OffsetDateTime::from_unix_timestamp(commit.time().seconds()).unwrap();
-
-        let author = commit.author();
-        let committer = commit.committer();
-
-        Ok((
-            Self {
-                repo_url: remote.url().map(|s| s.to_string()),
-                branch_or_tag: head.name().unwrap().to_string(),
-                commit_id: commit.id().to_string(),
-                commit_msg: String::from_utf8_lossy(commit.message_bytes())
-                    .to_string()
-                    .lines()
-                    .next()
-                    .map_or(String::new(), |s| s.to_string()),
-                author_name: author.name().unwrap().to_string(),
-                author_email: author.email().unwrap().to_string(),
-                committer_name: committer.name().unwrap().to_string(),
-                committer_email: committer.email().unwrap().to_string(),
-            },
-            time,
-        ))
-    }
-}
 
 #[derive(Debug, Clone, Deserialize, Serialize)]
 #[serde(rename_all = "camelCase")]
diff --git a/xtask/src/bench/mod.rs b/xtask/src/bench/mod.rs
index ea17b6f69..cfc7c124f 100644
--- a/xtask/src/bench/mod.rs
+++ b/xtask/src/bench/mod.rs
@@ -292,8 +292,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
         args.log_filter.parse().context("invalid --log-filter")?;
 
     let env = env_info::Environment::generate_from_current_config();
-    let (source, commit_date) =
-        env_info::Source::from_repo(".").context("could not get repository information")?;
+    let build_info = build_info::BuildInfo::from_build();
 
     let subscriber = tracing_subscriber::registry().with(
         tracing_subscriber::fmt::layer()
@@ -344,17 +343,18 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
             );
         }
 
-        let commit_message = source.commit_msg.split('\n').next().unwrap();
+        let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
         let max_workloads = args.workload_file.len();
         let reason: Option<&str> = args.reason.as_deref();
         let response = dashboard_client
             .put("invocation")
             .json(&json!({
                 "commit": {
-                    "sha1": source.commit_id,
+                    "sha1": build_info.commit_sha1,
                     "message": commit_message,
-                    "commit_date": commit_date,
-                    "branch": source.branch_or_tag
+                    "commit_date": build_info.commit_timestamp,
+                    "branch": build_info.branch,
+                    "tag": build_info.describe.and_then(|describe| describe.as_tag()),
                 },
                 "machine_hostname": env.hostname,
                 "max_workloads": max_workloads,

From 55f60a363808d39dd6ee16f9b589df48d0d653f7 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 4 Mar 2024 14:29:44 +0100
Subject: [PATCH 06/12] Update .gitignore

- Ignore `/bench` directory for git purposes
- Ignore benchmark DB
---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 5f660c735..e00f45c1e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@
 /data.ms
 /snapshots
 /dumps
+/bench
+/_xtask_benchmark.ms
 
 # Snapshots
 ## ... large

From eee46b7537f4a1fad754b9b9106351e765171b0c Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 4 Mar 2024 14:31:14 +0100
Subject: [PATCH 07/12] Add first workloads

---
 workloads/hackernews.json       | 164 ++++++++++++++++++++++++++++++++
 workloads/movies-nothreads.json |  44 +++++++++
 workloads/movies.json           |  42 ++++++++
 3 files changed, 250 insertions(+)
 create mode 100644 workloads/hackernews.json
 create mode 100644 workloads/movies-nothreads.json
 create mode 100644 workloads/movies.json

diff --git a/workloads/hackernews.json b/workloads/hackernews.json
new file mode 100644
index 000000000..0a99b69ff
--- /dev/null
+++ b/workloads/hackernews.json
@@ -0,0 +1,164 @@
+{
+  "name": "hackernews.ndjson_1M",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-100_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
+      "sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213"
+    },
+    "hackernews-200_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
+      "sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
+    },
+    "hackernews-300_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
+      "sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
+    },
+    "hackernews-400_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
+      "sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
+    },
+    "hackernews-500_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
+      "sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
+    },
+    "hackernews-600_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
+      "sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
+    },
+    "hackernews-700_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
+      "sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
+    },
+    "hackernews-800_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
+      "sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
+    },
+    "hackernews-900_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
+      "sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
+    },
+    "hackernews-1_000_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
+      "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
+    }
+  },
+  "commands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time"
+          ],
+          "searchableAttributes": [
+            "title"
+          ],
+          "filterableAttributes": [
+            "by"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-100_000.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-200_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-300_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-400_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-500_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-600_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-700_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-800_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-900_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-1_000_000.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/workloads/movies-nothreads.json b/workloads/movies-nothreads.json
new file mode 100644
index 000000000..175daacf9
--- /dev/null
+++ b/workloads/movies-nothreads.json
@@ -0,0 +1,44 @@
+{
+  "name": "movies.json,no-threads",
+  "run_count": 2,
+  "extra_cli_args": [
+    "--max-indexing-threads=1"
+  ],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "commands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ]
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/workloads/movies.json b/workloads/movies.json
new file mode 100644
index 000000000..445ff3aca
--- /dev/null
+++ b/workloads/movies.json
@@ -0,0 +1,42 @@
+{
+  "name": "movies.json",
+  "run_count": 10,
+  "extra_cli_args": [],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "commands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ]
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ]
+}

From adcd848809647c7167f23b0612c8e69452ed0beb Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 4 Mar 2024 23:03:26 +0100
Subject: [PATCH 08/12] CI: Add bench workflows

---
 .github/workflows/bench-manual.yml        | 30 +++++++++++++++
 .github/workflows/bench-pr.yml            | 46 +++++++++++++++++++++++
 .github/workflows/bench-push-indexing.yml | 25 ++++++++++++
 3 files changed, 101 insertions(+)
 create mode 100644 .github/workflows/bench-manual.yml
 create mode 100644 .github/workflows/bench-pr.yml
 create mode 100644 .github/workflows/bench-push-indexing.yml

diff --git a/.github/workflows/bench-manual.yml b/.github/workflows/bench-manual.yml
new file mode 100644
index 000000000..6d8c3a006
--- /dev/null
+++ b/.github/workflows/bench-manual.yml
@@ -0,0 +1,30 @@
+name: Bench (manual)
+
+on:
+    workflow_dispatch:
+        inputs:
+            workload:
+                description: 'The path to the workloads to execute (workloads/...)'
+                required: true
+                default: 'workloads/movies.json'
+
+env:
+    WORKLOAD_NAME: ${{ github.event.inputs.workload }}
+
+jobs:
+    benchmarks:
+        name: Run and upload benchmarks
+        runs-on: benchmarks
+        timeout-minutes: 180 # 3h
+        steps:
+            - uses: actions/checkout@v3
+            - uses: actions-rs/toolchain@v1
+              with:
+                profile: minimal
+                toolchain: stable
+                override: true
+
+            - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
+              run: |
+               cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME}
+
diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml
new file mode 100644
index 000000000..6f4956542
--- /dev/null
+++ b/.github/workflows/bench-pr.yml
@@ -0,0 +1,46 @@
+name: Bench (PR)
+on:
+    issue_comment:
+        types: [created]
+
+permissions:
+    issues: write
+
+env:
+    GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+
+jobs:
+    run-benchmarks-on-comment:
+      if: startsWith(github.event.comment.body, '/bench')
+      name: Run and upload benchmarks
+      runs-on: benchmarks
+      timeout-minutes: 180 # 3h
+      steps:
+        - name: Check for Command
+          id: command
+          uses: xt0rted/slash-command-action@v2
+          with:
+              command: bench
+              reaction-type: "rocket"
+              repo-token: ${{ env.GH_TOKEN }}
+
+        - uses: xt0rted/pull-request-comment-branch@v2
+          id: comment-branch
+          with:
+            repo_token: ${{ env.GH_TOKEN }}
+
+        - uses: actions/checkout@v3
+          if: success()
+          with:
+            fetch-depth: 0 # fetch full history to be able to get main commit sha
+            ref: ${{ steps.comment-branch.outputs.head_ref }}
+
+        - uses: actions-rs/toolchain@v1
+          with:
+            profile: minimal
+            toolchain: stable
+            override: true
+
+        - name: Run benchmarks on PR ${{ github.event.issue.id }}
+          run: |
+            cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }}
\ No newline at end of file
diff --git a/.github/workflows/bench-push-indexing.yml b/.github/workflows/bench-push-indexing.yml
new file mode 100644
index 000000000..fd0f19a5a
--- /dev/null
+++ b/.github/workflows/bench-push-indexing.yml
@@ -0,0 +1,25 @@
+name: Indexing bench (push)
+
+on:
+    push:
+        branches:
+            - main
+
+jobs:
+    benchmarks:
+        name: Run and upload benchmarks
+        runs-on: benchmarks
+        timeout-minutes: 180 # 3h
+        steps:
+          - uses: actions/checkout@v3
+          - uses: actions-rs/toolchain@v1
+            with:
+              profile: minimal
+              toolchain: stable
+              override: true
+
+          # Run benchmarks
+          - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
+            run: |
+              cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json
+

From 7ee20b0895f902275c727784b4b098e238e16abf Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 5 Mar 2024 14:42:06 +0100
Subject: [PATCH 09/12] Refactor xtask bench

---
 xtask/src/bench/assets.rs        |  250 ++++++++
 xtask/src/bench/client.rs        |   80 +++
 xtask/src/bench/command.rs       |  194 ++++++
 xtask/src/bench/dashboard.rs     |  167 +++++
 xtask/src/bench/meili_process.rs |  112 ++++
 xtask/src/bench/mod.rs           | 1014 +-----------------------------
 xtask/src/bench/workload.rs      |  262 ++++++++
 7 files changed, 1094 insertions(+), 985 deletions(-)
 create mode 100644 xtask/src/bench/assets.rs
 create mode 100644 xtask/src/bench/client.rs
 create mode 100644 xtask/src/bench/command.rs
 create mode 100644 xtask/src/bench/dashboard.rs
 create mode 100644 xtask/src/bench/meili_process.rs
 create mode 100644 xtask/src/bench/workload.rs

diff --git a/xtask/src/bench/assets.rs b/xtask/src/bench/assets.rs
new file mode 100644
index 000000000..241928dbf
--- /dev/null
+++ b/xtask/src/bench/assets.rs
@@ -0,0 +1,250 @@
+use std::collections::BTreeMap;
+use std::io::{Read as _, Seek as _, Write as _};
+
+use anyhow::{bail, Context};
+use futures_util::TryStreamExt as _;
+use serde::Deserialize;
+use sha2::Digest;
+
+use super::client::Client;
+
+#[derive(Deserialize, Clone)]
+pub struct Asset {
+    pub local_location: Option<String>,
+    pub remote_location: Option<String>,
+    #[serde(default)]
+    pub format: AssetFormat,
+    pub sha256: Option<String>,
+}
+
+#[derive(Deserialize, Default, Copy, Clone)]
+pub enum AssetFormat {
+    #[default]
+    Auto,
+    Json,
+    NdJson,
+    Raw,
+}
+
+impl AssetFormat {
+    pub fn to_content_type(self, filename: &str) -> &'static str {
+        match self {
+            AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename),
+            AssetFormat::Json => "application/json",
+            AssetFormat::NdJson => "application/x-ndjson",
+            AssetFormat::Raw => "application/octet-stream",
+        }
+    }
+
+    fn auto_detect(filename: &str) -> Self {
+        let path = std::path::Path::new(filename);
+        match path.extension().and_then(|extension| extension.to_str()) {
+            Some(extension) if extension.eq_ignore_ascii_case("json") => Self::Json,
+            Some(extension) if extension.eq_ignore_ascii_case("ndjson") => Self::NdJson,
+            extension => {
+                tracing::warn!(asset = filename, ?extension, "asset has format `Auto`, but extension was not recognized. Specify `Raw` format to suppress this warning.");
+                AssetFormat::Raw
+            }
+        }
+    }
+}
+
+pub fn fetch_asset(
+    name: &str,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<(std::fs::File, AssetFormat)> {
+    let asset =
+        assets.get(name).with_context(|| format!("could not find asset with name '{name}'"))?;
+    let filename = if let Some(local_filename) = &asset.local_location {
+        local_filename.clone()
+    } else {
+        format!("{asset_folder}/{name}")
+    };
+
+    Ok((
+        std::fs::File::open(&filename)
+            .with_context(|| format!("could not open asset '{name}' at '{filename}'"))?,
+        asset.format,
+    ))
+}
+
+#[tracing::instrument(skip(client, assets), fields(asset_count = assets.len()))]
+pub async fn fetch_assets(
+    client: &Client,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    let mut download_tasks = tokio::task::JoinSet::new();
+    for (name, asset) in assets {
+        // trying local
+        if let Some(local) = &asset.local_location {
+            match std::fs::File::open(local) {
+                Ok(file) => {
+                    if check_sha256(name, asset, file)? {
+                        continue;
+                    } else {
+                        tracing::warn!(asset = name, file = local, "found local resource for asset but hash differed, skipping to asset store");
+                    }
+                }
+                Err(error) => match error.kind() {
+                    std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
+                    }
+                    _ => tracing::warn!(
+                        error = &error as &dyn std::error::Error,
+                        "error checking local resource, skipping to asset store"
+                    ),
+                },
+            }
+        }
+
+        // checking asset store
+        let store_filename = format!("{}/{}", asset_folder, name);
+
+        match std::fs::File::open(&store_filename) {
+            Ok(file) => {
+                if check_sha256(name, asset, file)? {
+                    continue;
+                } else {
+                    tracing::warn!(asset = name, file = store_filename, "found resource for asset in asset store, but hash differed, skipping to remote method");
+                }
+            }
+            Err(error) => match error.kind() {
+                std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
+                }
+                _ => tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "error checking resource in store, skipping to remote method"
+                ),
+            },
+        }
+
+        // downloading remote
+        match &asset.remote_location {
+            Some(location) => {
+                std::fs::create_dir_all(asset_folder).with_context(|| format!("could not create asset folder at {asset_folder}"))?;
+                download_tasks.spawn({
+                    let client = client.clone();
+                    let name = name.to_string();
+                    let location = location.to_string();
+                    let store_filename = store_filename.clone();
+                    let asset = asset.clone();
+                    download_asset(client, name, asset, location, store_filename)});
+            },
+            None => bail!("asset {name} has no remote location, but was not found locally or in the asset store"),
+        }
+    }
+
+    while let Some(res) = download_tasks.join_next().await {
+        res.context("download task panicked")?.context("download task failed")?;
+    }
+
+    Ok(())
+}
+
+fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::Result<bool> {
+    let mut bytes = Vec::new();
+    file.read_to_end(&mut bytes).with_context(|| format!("hashing file for asset {name}"))?;
+    let mut file_hash = sha2::Sha256::new();
+    file_hash.update(&bytes);
+    let file_hash = file_hash.finalize();
+    let file_hash = format!("{:x}", file_hash);
+    tracing::debug!(hash = file_hash, "hashed local file");
+
+    Ok(match &asset.sha256 {
+        Some(hash) => {
+            tracing::debug!(hash, "hash from workload");
+            if hash.to_ascii_lowercase() == file_hash {
+                true
+            } else {
+                tracing::warn!(
+                    file_hash,
+                    asset_hash = hash.to_ascii_lowercase(),
+                    "hashes don't match"
+                );
+                false
+            }
+        }
+        None => {
+            tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file");
+            true
+        }
+    })
+}
+
+#[tracing::instrument(skip(client, asset, name), fields(asset = name))]
+async fn download_asset(
+    client: Client,
+    name: String,
+    asset: Asset,
+    src: String,
+    dest_filename: String,
+) -> anyhow::Result<()> {
+    let context = || format!("failure downloading asset {name} from {src}");
+
+    let response = client.get(&src).send().await.with_context(context)?;
+
+    let file = std::fs::File::options()
+        .create(true)
+        .truncate(true)
+        .write(true)
+        .read(true)
+        .open(&dest_filename)
+        .with_context(|| format!("creating destination file {dest_filename}"))
+        .with_context(context)?;
+
+    let mut dest = std::io::BufWriter::new(
+        file.try_clone().context("cloning I/O handle").with_context(context)?,
+    );
+
+    let total_len: Option<u64> = response
+        .headers()
+        .get(reqwest::header::CONTENT_LENGTH)
+        .and_then(|value| value.to_str().ok())
+        .and_then(|value| value.parse().ok());
+
+    let progress = tokio::spawn({
+        let name = name.clone();
+        async move {
+            loop {
+                match file.metadata().context("could not get file metadata") {
+                    Ok(metadata) => {
+                        let len = metadata.len();
+                        tracing::info!(
+                            asset = name,
+                            downloaded_bytes = len,
+                            total_bytes = total_len,
+                            "asset download in progress"
+                        );
+                    }
+                    Err(error) => {
+                        tracing::warn!(%error, "could not get file metadata");
+                    }
+                }
+                tokio::time::sleep(std::time::Duration::from_secs(60)).await;
+            }
+        }
+    });
+
+    let writing_context = || format!("while writing to destination file at {dest_filename}");
+
+    let mut response = response.bytes_stream();
+
+    while let Some(bytes) =
+        response.try_next().await.context("while downloading file").with_context(context)?
+    {
+        dest.write_all(&bytes).with_context(writing_context).with_context(context)?;
+    }
+
+    progress.abort();
+
+    let mut file = dest.into_inner().with_context(writing_context).with_context(context)?;
+
+    file.rewind().context("while rewinding asset file")?;
+
+    if !check_sha256(&name, &asset, file)? {
+        bail!("asset '{name}': sha256 mismatch for file {dest_filename} downloaded from {src}")
+    }
+
+    Ok(())
+}
diff --git a/xtask/src/bench/client.rs b/xtask/src/bench/client.rs
new file mode 100644
index 000000000..3e46615cc
--- /dev/null
+++ b/xtask/src/bench/client.rs
@@ -0,0 +1,80 @@
+use anyhow::Context;
+use serde::Deserialize;
+
+#[derive(Debug, Clone)]
+pub struct Client {
+    base_url: Option<String>,
+    client: reqwest::Client,
+}
+
+impl Client {
+    pub fn new(
+        base_url: Option<String>,
+        api_key: Option<&str>,
+        timeout: Option<std::time::Duration>,
+    ) -> anyhow::Result<Self> {
+        let mut headers = reqwest::header::HeaderMap::new();
+        if let Some(api_key) = api_key {
+            headers.append(
+                reqwest::header::AUTHORIZATION,
+                reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}"))
+                    .context("Invalid authorization header")?,
+            );
+        }
+
+        let client = reqwest::ClientBuilder::new().default_headers(headers);
+        let client = if let Some(timeout) = timeout { client.timeout(timeout) } else { client };
+        let client = client.build()?;
+        Ok(Self { base_url, client })
+    }
+
+    pub fn request(&self, method: reqwest::Method, route: &str) -> reqwest::RequestBuilder {
+        if let Some(base_url) = &self.base_url {
+            if route.is_empty() {
+                self.client.request(method, base_url)
+            } else {
+                self.client.request(method, format!("{}/{}", base_url, route))
+            }
+        } else {
+            self.client.request(method, route)
+        }
+    }
+
+    pub fn get(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::GET, route)
+    }
+
+    pub fn put(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::PUT, route)
+    }
+
+    pub fn post(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::POST, route)
+    }
+
+    pub fn delete(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::DELETE, route)
+    }
+}
+
+#[derive(Debug, Clone, Copy, Deserialize)]
+#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
+pub enum Method {
+    Get,
+    Post,
+    Patch,
+    Delete,
+    Put,
+}
+
+impl From<Method> for reqwest::Method {
+    fn from(value: Method) -> Self {
+        match value {
+            Method::Get => Self::GET,
+            Method::Post => Self::POST,
+            Method::Patch => Self::PATCH,
+            Method::Delete => Self::DELETE,
+            Method::Put => Self::PUT,
+        }
+    }
+}
diff --git a/xtask/src/bench/command.rs b/xtask/src/bench/command.rs
new file mode 100644
index 000000000..0f0b5d213
--- /dev/null
+++ b/xtask/src/bench/command.rs
@@ -0,0 +1,194 @@
+use std::collections::BTreeMap;
+use std::fmt::Display;
+use std::io::Read as _;
+
+use anyhow::{bail, Context as _};
+use serde::Deserialize;
+
+use super::assets::{fetch_asset, Asset};
+use super::client::{Client, Method};
+
+#[derive(Clone, Deserialize)]
+pub struct Command {
+    pub route: String,
+    pub method: Method,
+    #[serde(default)]
+    pub body: Body,
+    #[serde(default)]
+    pub synchronous: SyncMode,
+}
+
+#[derive(Default, Clone, Deserialize)]
+#[serde(untagged)]
+pub enum Body {
+    Inline {
+        inline: serde_json::Value,
+    },
+    Asset {
+        asset: String,
+    },
+    #[default]
+    Empty,
+}
+
+impl Body {
+    pub fn get(
+        self,
+        assets: &BTreeMap<String, Asset>,
+        asset_folder: &str,
+    ) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
+        Ok(match self {
+            Body::Inline { inline: body } => Some((
+                serde_json::to_vec(&body)
+                    .context("serializing to bytes")
+                    .context("while getting inline body")?,
+                "application/json",
+            )),
+            Body::Asset { asset: name } => Some({
+                let context = || format!("while getting body from asset '{name}'");
+                let (mut file, format) =
+                    fetch_asset(&name, assets, asset_folder).with_context(context)?;
+                let mut buf = Vec::new();
+                file.read_to_end(&mut buf).with_context(context)?;
+                (buf, format.to_content_type(&name))
+            }),
+            Body::Empty => None,
+        })
+    }
+}
+
+impl Display for Command {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
+    }
+}
+
+#[derive(Default, Debug, Clone, Copy, Deserialize)]
+pub enum SyncMode {
+    DontWait,
+    #[default]
+    WaitForResponse,
+    WaitForTask,
+}
+
+pub async fn run_batch(
+    client: &Client,
+    batch: &[Command],
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    let [.., last] = batch else { return Ok(()) };
+    let sync = last.synchronous;
+
+    let mut tasks = tokio::task::JoinSet::new();
+
+    for command in batch {
+        // FIXME: you probably don't want to copy assets everytime here
+        tasks.spawn({
+            let client = client.clone();
+            let command = command.clone();
+            let assets = assets.clone();
+            let asset_folder = asset_folder.to_owned();
+
+            async move { run(client, command, &assets, &asset_folder).await }
+        });
+    }
+
+    while let Some(result) = tasks.join_next().await {
+        result
+            .context("panicked while executing command")?
+            .context("error while executing command")?;
+    }
+
+    match sync {
+        SyncMode::DontWait => {}
+        SyncMode::WaitForResponse => {}
+        SyncMode::WaitForTask => wait_for_tasks(client).await?,
+    }
+
+    Ok(())
+}
+
+async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
+    loop {
+        let response = client
+            .get("tasks?statuses=enqueued,processing")
+            .send()
+            .await
+            .context("could not wait for tasks")?;
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response to JSON")
+            .context("could not wait for tasks")?;
+        match response.get("total") {
+            Some(serde_json::Value::Number(number)) => {
+                let number = number.as_u64().with_context(|| {
+                    format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
+                })?;
+                if number == 0 {
+                    break;
+                } else {
+                    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                    continue;
+                }
+            }
+            Some(thing_else) => {
+                bail!(format!(
+                    "waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
+                ))
+            }
+            None => {
+                bail!(format!(
+                    "waiting for tasks: expected response to contain 'total', got '{response}'"
+                ))
+            }
+        }
+    }
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
+pub async fn run(
+    client: Client,
+    mut command: Command,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    // memtake the body here to leave an empty body in its place, so that command is not partially moved-out
+    let body = std::mem::take(&mut command.body)
+        .get(assets, asset_folder)
+        .with_context(|| format!("while getting body for command {command}"))?;
+
+    let request = client.request(command.method.into(), &command.route);
+
+    let request = if let Some((body, content_type)) = body {
+        request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
+    } else {
+        request
+    };
+
+    let response =
+        request.send().await.with_context(|| format!("error sending command: {}", command))?;
+
+    let code = response.status();
+    if code.is_client_error() {
+        tracing::error!(%command, %code, "error in workload file");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("parsing error in workload file when sending command")?;
+        bail!("error in workload file: server responded with error code {code} and '{response}'")
+    } else if code.is_server_error() {
+        tracing::error!(%command, %code, "server error");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("parsing server error when sending command")?;
+        bail!("server error: server responded with error code {code} and '{response}'")
+    }
+
+    Ok(())
+}
diff --git a/xtask/src/bench/dashboard.rs b/xtask/src/bench/dashboard.rs
new file mode 100644
index 000000000..833426207
--- /dev/null
+++ b/xtask/src/bench/dashboard.rs
@@ -0,0 +1,167 @@
+use std::collections::BTreeMap;
+
+use anyhow::{bail, Context};
+use serde_json::json;
+use tokio::signal::ctrl_c;
+use tokio::task::AbortHandle;
+use tracing_trace::processor::span_stats::CallStats;
+use uuid::Uuid;
+
+use super::client::Client;
+use super::env_info;
+use super::workload::Workload;
+
+pub async fn cancel_on_ctrl_c(
+    invocation_uuid: Uuid,
+    dashboard_client: Client,
+    abort_handle: AbortHandle,
+) {
+    tracing::info!("press Ctrl-C to cancel the invocation");
+    match ctrl_c().await {
+        Ok(()) => {
+            tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
+            mark_as_failed(dashboard_client, invocation_uuid, None).await;
+            abort_handle.abort();
+        }
+        Err(error) => tracing::warn!(
+            error = &error as &dyn std::error::Error,
+            "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
+        ),
+    }
+}
+
+pub async fn mark_as_failed(
+    dashboard_client: Client,
+    invocation_uuid: Uuid,
+    failure_reason: Option<String>,
+) {
+    let response = dashboard_client
+        .post("cancel-invocation")
+        .json(&json!({
+            "invocation_uuid": invocation_uuid,
+            "failure_reason": failure_reason,
+        }))
+        .send()
+        .await;
+    let response = match response {
+        Ok(response) => response,
+        Err(response_error) => {
+            tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
+            return;
+        }
+    };
+
+    if !response.status().is_success() {
+        tracing::error!(
+            %invocation_uuid,
+            "could not mark invocation as failed: {}",
+            response.text().await.unwrap()
+        );
+        return;
+    }
+    tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
+}
+
+pub async fn send_machine_info(
+    dashboard_client: &Client,
+    env: &env_info::Environment,
+) -> anyhow::Result<()> {
+    let response = dashboard_client
+        .put("machine")
+        .json(&json!({"hostname": env.hostname}))
+        .send()
+        .await
+        .context("sending machine information")?;
+    if !response.status().is_success() {
+        bail!(
+            "could not send machine information: {} {}",
+            response.status(),
+            response.text().await.unwrap_or_else(|_| "unknown".into())
+        );
+    }
+    Ok(())
+}
+
+pub async fn create_invocation(
+    dashboard_client: &Client,
+    build_info: build_info::BuildInfo,
+    commit_message: &str,
+    env: env_info::Environment,
+    max_workloads: usize,
+    reason: Option<&str>,
+) -> anyhow::Result<Uuid> {
+    let response = dashboard_client
+        .put("invocation")
+        .json(&json!({
+            "commit": {
+                "sha1": build_info.commit_sha1,
+                "message": commit_message,
+                "commit_date": build_info.commit_timestamp,
+                "branch": build_info.branch,
+                "tag": build_info.describe.and_then(|describe| describe.as_tag()),
+            },
+            "machine_hostname": env.hostname,
+            "max_workloads": max_workloads,
+            "reason": reason
+        }))
+        .send()
+        .await
+        .context("sending invocation")?;
+    if !response.status().is_success() {
+        bail!(
+            "could not send new invocation: {}",
+            response.text().await.unwrap_or_else(|_| "unknown".into())
+        );
+    }
+    let invocation_uuid: Uuid =
+        response.json().await.context("could not deserialize invocation response as JSON")?;
+    Ok(invocation_uuid)
+}
+
+pub async fn create_workload(
+    dashboard_client: &Client,
+    invocation_uuid: Uuid,
+    workload: &Workload,
+) -> anyhow::Result<Uuid> {
+    let response = dashboard_client
+        .put("workload")
+        .json(&json!({
+            "invocation_uuid": invocation_uuid,
+            "name": &workload.name,
+            "max_runs": workload.run_count,
+        }))
+        .send()
+        .await
+        .context("could not create new workload")?;
+
+    if !response.status().is_success() {
+        bail!("creating new workload failed: {}", response.text().await.unwrap())
+    }
+
+    let workload_uuid: Uuid =
+        response.json().await.context("could not deserialize JSON as UUID")?;
+    Ok(workload_uuid)
+}
+
+pub async fn create_run(
+    dashboard_client: Client,
+    workload_uuid: Uuid,
+    report: &BTreeMap<String, CallStats>,
+) -> anyhow::Result<()> {
+    let response = dashboard_client
+        .put("run")
+        .json(&json!({
+            "workload_uuid": workload_uuid,
+            "data": report
+        }))
+        .send()
+        .await
+        .context("sending new run")?;
+    if !response.status().is_success() {
+        bail!(
+            "sending new run failed: {}",
+            response.text().await.unwrap_or_else(|_| "unknown".into())
+        )
+    }
+    Ok(())
+}
diff --git a/xtask/src/bench/meili_process.rs b/xtask/src/bench/meili_process.rs
new file mode 100644
index 000000000..99f6f4ea6
--- /dev/null
+++ b/xtask/src/bench/meili_process.rs
@@ -0,0 +1,112 @@
+use std::collections::BTreeMap;
+
+use anyhow::{bail, Context as _};
+
+use super::assets::Asset;
+use super::client::Client;
+use super::workload::Workload;
+
+pub async fn kill(mut meilisearch: tokio::process::Child) {
+    if let Err(error) = meilisearch.kill().await {
+        tracing::warn!(
+            error = &error as &dyn std::error::Error,
+            "while terminating Meilisearch server"
+        )
+    }
+}
+
+#[tracing::instrument]
+pub async fn build() -> anyhow::Result<()> {
+    let mut command = tokio::process::Command::new("cargo");
+    command.arg("build").arg("--release").arg("-p").arg("meilisearch");
+
+    command.kill_on_drop(true);
+
+    let mut builder = command.spawn().context("error building Meilisearch")?;
+
+    if !builder.wait().await.context("could not build Meilisearch")?.success() {
+        bail!("failed building Meilisearch")
+    }
+
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))]
+pub async fn start(
+    client: &Client,
+    master_key: Option<&str>,
+    workload: &Workload,
+    asset_folder: &str,
+) -> anyhow::Result<tokio::process::Child> {
+    let mut command = tokio::process::Command::new("cargo");
+    command
+        .arg("run")
+        .arg("--release")
+        .arg("-p")
+        .arg("meilisearch")
+        .arg("--bin")
+        .arg("meilisearch")
+        .arg("--");
+
+    command.arg("--db-path").arg("./_xtask_benchmark.ms");
+    if let Some(master_key) = master_key {
+        command.arg("--master-key").arg(master_key);
+    }
+    command.arg("--experimental-enable-logs-route");
+
+    for extra_arg in workload.extra_cli_args.iter() {
+        command.arg(extra_arg);
+    }
+
+    command.kill_on_drop(true);
+
+    let mut meilisearch = command.spawn().context("Error starting Meilisearch")?;
+
+    wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?;
+
+    Ok(meilisearch)
+}
+
+async fn wait_for_health(
+    client: &Client,
+    meilisearch: &mut tokio::process::Child,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    for i in 0..100 {
+        let res = super::command::run(client.clone(), health_command(), assets, asset_folder).await;
+        if res.is_ok() {
+            // check that this is actually the current Meilisearch instance that answered us
+            if let Some(exit_code) =
+                meilisearch.try_wait().context("cannot check Meilisearch server process status")?
+            {
+                tracing::error!("Got an health response from a different process");
+                bail!("Meilisearch server exited early with code {exit_code}");
+            }
+
+            return Ok(());
+        }
+        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+        // check whether the Meilisearch instance exited early (cut the wait)
+        if let Some(exit_code) =
+            meilisearch.try_wait().context("cannot check Meilisearch server process status")?
+        {
+            bail!("Meilisearch server exited early with code {exit_code}");
+        }
+        tracing::debug!(attempt = i, "Waiting for Meilisearch to go up");
+    }
+    bail!("meilisearch is not responding")
+}
+
+fn health_command() -> super::command::Command {
+    super::command::Command {
+        route: "/health".into(),
+        method: super::client::Method::Get,
+        body: Default::default(),
+        synchronous: super::command::SyncMode::WaitForResponse,
+    }
+}
+
+pub fn delete_db() {
+    let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms");
+}
diff --git a/xtask/src/bench/mod.rs b/xtask/src/bench/mod.rs
index cfc7c124f..62c11b604 100644
--- a/xtask/src/bench/mod.rs
+++ b/xtask/src/bench/mod.rs
@@ -1,20 +1,21 @@
+mod assets;
+mod client;
+mod command;
+mod dashboard;
 mod env_info;
+mod meili_process;
+mod workload;
 
-use std::collections::BTreeMap;
-use std::fmt::Display;
-use std::io::{Read, Seek, Write};
 use std::path::PathBuf;
 
-use anyhow::{bail, Context};
+use anyhow::Context;
 use clap::Parser;
-use futures_util::TryStreamExt;
-use serde::Deserialize;
-use serde_json::json;
-use sha2::Digest;
 use tracing_subscriber::fmt::format::FmtSpan;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::Layer;
-use uuid::Uuid;
+
+use self::client::Client;
+use self::workload::Workload;
 
 pub fn default_http_addr() -> String {
     "127.0.0.1:7700".to_string()
@@ -35,62 +36,6 @@ pub fn default_dashboard_url() -> String {
     "http://localhost:9001".into()
 }
 
-#[derive(Debug, Clone)]
-pub struct Client {
-    base_url: Option<String>,
-    client: reqwest::Client,
-}
-
-impl Client {
-    pub fn new(
-        base_url: Option<String>,
-        api_key: Option<&str>,
-        timeout: Option<std::time::Duration>,
-    ) -> anyhow::Result<Self> {
-        let mut headers = reqwest::header::HeaderMap::new();
-        if let Some(api_key) = api_key {
-            headers.append(
-                reqwest::header::AUTHORIZATION,
-                reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}"))
-                    .context("Invalid authorization header")?,
-            );
-        }
-
-        let client = reqwest::ClientBuilder::new().default_headers(headers);
-        let client = if let Some(timeout) = timeout { client.timeout(timeout) } else { client };
-        let client = client.build()?;
-        Ok(Self { base_url, client })
-    }
-
-    pub fn request(&self, method: reqwest::Method, route: &str) -> reqwest::RequestBuilder {
-        if let Some(base_url) = &self.base_url {
-            if route.is_empty() {
-                self.client.request(method, base_url)
-            } else {
-                self.client.request(method, format!("{}/{}", base_url, route))
-            }
-        } else {
-            self.client.request(method, route)
-        }
-    }
-
-    pub fn get(&self, route: &str) -> reqwest::RequestBuilder {
-        self.request(reqwest::Method::GET, route)
-    }
-
-    pub fn put(&self, route: &str) -> reqwest::RequestBuilder {
-        self.request(reqwest::Method::PUT, route)
-    }
-
-    pub fn post(&self, route: &str) -> reqwest::RequestBuilder {
-        self.request(reqwest::Method::POST, route)
-    }
-
-    pub fn delete(&self, route: &str) -> reqwest::RequestBuilder {
-        self.request(reqwest::Method::DELETE, route)
-    }
-}
-
 /// Run benchmarks from a workload
 #[derive(Parser, Debug)]
 pub struct BenchDeriveArgs {
@@ -134,166 +79,11 @@ pub struct BenchDeriveArgs {
     reason: Option<String>,
 }
 
-#[derive(Deserialize)]
-pub struct Workload {
-    pub name: String,
-    pub run_count: u16,
-    pub extra_cli_args: Vec<String>,
-    pub assets: BTreeMap<String, Asset>,
-    pub commands: Vec<Command>,
-}
-
-#[derive(Deserialize, Clone)]
-pub struct Asset {
-    pub local_location: Option<String>,
-    pub remote_location: Option<String>,
-    #[serde(default)]
-    pub format: AssetFormat,
-    pub sha256: Option<String>,
-}
-
-#[derive(Deserialize, Default, Copy, Clone)]
-pub enum AssetFormat {
-    #[default]
-    Auto,
-    Json,
-    NdJson,
-    Raw,
-}
-impl AssetFormat {
-    fn to_content_type(self, filename: &str) -> &'static str {
-        match self {
-            AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename),
-            AssetFormat::Json => "application/json",
-            AssetFormat::NdJson => "application/x-ndjson",
-            AssetFormat::Raw => "application/octet-stream",
-        }
-    }
-
-    fn auto_detect(filename: &str) -> Self {
-        let path = std::path::Path::new(filename);
-        match path.extension().and_then(|extension| extension.to_str()) {
-            Some(extension) if extension.eq_ignore_ascii_case("json") => Self::Json,
-            Some(extension) if extension.eq_ignore_ascii_case("ndjson") => Self::NdJson,
-            extension => {
-                tracing::warn!(asset = filename, ?extension, "asset has format `Auto`, but extension was not recognized. Specify `Raw` format to suppress this warning.");
-                AssetFormat::Raw
-            }
-        }
-    }
-}
-
-#[derive(Clone, Deserialize)]
-pub struct Command {
-    pub route: String,
-    pub method: Method,
-    #[serde(default)]
-    pub body: Body,
-    #[serde(default)]
-    pub synchronous: SyncMode,
-}
-
-#[derive(Default, Clone, Deserialize)]
-#[serde(untagged)]
-pub enum Body {
-    Inline {
-        inline: serde_json::Value,
-    },
-    Asset {
-        asset: String,
-    },
-    #[default]
-    Empty,
-}
-
-impl Body {
-    pub fn get(
-        self,
-        assets: &BTreeMap<String, Asset>,
-        asset_folder: &str,
-    ) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
-        Ok(match self {
-            Body::Inline { inline: body } => Some((
-                serde_json::to_vec(&body)
-                    .context("serializing to bytes")
-                    .context("while getting inline body")?,
-                "application/json",
-            )),
-            Body::Asset { asset: name } => Some({
-                let context = || format!("while getting body from asset '{name}'");
-                let (mut file, format) =
-                    fetch_asset(&name, assets, asset_folder).with_context(context)?;
-                let mut buf = Vec::new();
-                file.read_to_end(&mut buf).with_context(context)?;
-                (buf, format.to_content_type(&name))
-            }),
-            Body::Empty => None,
-        })
-    }
-}
-
-fn fetch_asset(
-    name: &str,
-    assets: &BTreeMap<String, Asset>,
-    asset_folder: &str,
-) -> anyhow::Result<(std::fs::File, AssetFormat)> {
-    let asset =
-        assets.get(name).with_context(|| format!("could not find asset with name '{name}'"))?;
-    let filename = if let Some(local_filename) = &asset.local_location {
-        local_filename.clone()
-    } else {
-        format!("{asset_folder}/{name}")
-    };
-
-    Ok((
-        std::fs::File::open(&filename)
-            .with_context(|| format!("could not open asset '{name}' at '{filename}'"))?,
-        asset.format,
-    ))
-}
-
-impl Display for Command {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
-    }
-}
-
-#[derive(Debug, Clone, Copy, Deserialize)]
-pub enum Method {
-    GET,
-    POST,
-    PATCH,
-    DELETE,
-    PUT,
-}
-
-impl From<Method> for reqwest::Method {
-    fn from(value: Method) -> Self {
-        match value {
-            Method::GET => Self::GET,
-            Method::POST => Self::POST,
-            Method::PATCH => Self::PATCH,
-            Method::DELETE => Self::DELETE,
-            Method::PUT => Self::PUT,
-        }
-    }
-}
-
-#[derive(Default, Debug, Clone, Copy, Deserialize)]
-pub enum SyncMode {
-    DontWait,
-    #[default]
-    WaitForResponse,
-    WaitForTask,
-}
-
 pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
+    // setup logs
     let filter: tracing_subscriber::filter::Targets =
         args.log_filter.parse().context("invalid --log-filter")?;
 
-    let env = env_info::Environment::generate_from_current_config();
-    let build_info = build_info::BuildInfo::from_build();
-
     let subscriber = tracing_subscriber::registry().with(
         tracing_subscriber::fmt::layer()
             .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
@@ -301,9 +91,15 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
     );
     tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
 
+    // fetch environment and build info
+    let env = env_info::Environment::generate_from_current_config();
+    let build_info = build_info::BuildInfo::from_build();
+
+    // tokio runtime
     let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
     let _scope = rt.enter();
 
+    // setup clients
     let assets_client =
         Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
 
@@ -328,55 +124,19 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
         Some(std::time::Duration::from_secs(60)),
     )?;
 
+    // enter runtime
+
     rt.block_on(async {
-        let response = dashboard_client
-            .put("machine")
-            .json(&json!({"hostname": env.hostname}))
-            .send()
-            .await
-            .context("sending machine information")?;
-        if !response.status().is_success() {
-            bail!(
-                "could not send machine information: {} {}",
-                response.status(),
-                response.text().await.unwrap_or_else(|_| "unknown".into())
-            );
-        }
+        dashboard::send_machine_info(&dashboard_client, &env).await?;
 
         let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
         let max_workloads = args.workload_file.len();
         let reason: Option<&str> = args.reason.as_deref();
-        let response = dashboard_client
-            .put("invocation")
-            .json(&json!({
-                "commit": {
-                    "sha1": build_info.commit_sha1,
-                    "message": commit_message,
-                    "commit_date": build_info.commit_timestamp,
-                    "branch": build_info.branch,
-                    "tag": build_info.describe.and_then(|describe| describe.as_tag()),
-                },
-                "machine_hostname": env.hostname,
-                "max_workloads": max_workloads,
-                "reason": reason
-            }))
-            .send()
-            .await
-            .context("sending invocation")?;
-
-        if !response.status().is_success() {
-            bail!(
-                "could not send new invocation: {}",
-                response.text().await.unwrap_or_else(|_| "unknown".into())
-            );
-        }
-
-        let invocation_uuid: Uuid =
-            response.json().await.context("could not deserialize invocation response as JSON")?;
-
-
+        let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?;
 
         tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
+
+        // main task
         let workload_runs = tokio::spawn(
             {
                 let dashboard_client = dashboard_client.clone();
@@ -388,7 +148,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                 )
                 .with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
 
-                run_workload(
+                workload::execute(
                     &assets_client,
                     &dashboard_client,
                     &logs_client,
@@ -403,26 +163,14 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
             Ok::<(), anyhow::Error>(())
         }});
 
+        // handle ctrl-c
         let abort_handle = workload_runs.abort_handle();
-
         tokio::spawn({
             let dashboard_client = dashboard_client.clone();
-            async move {
-                tracing::info!("press Ctrl-C to cancel the invocation");
-                match tokio::signal::ctrl_c().await {
-                    Ok(()) => {
-                        tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
-                        mark_as_failed(dashboard_client, invocation_uuid, None).await;
-                        abort_handle.abort();
-                    }
-                    Err(error) => tracing::warn!(
-                        error = &error as &dyn std::error::Error,
-                        "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
-                    ),
-                }
-            }
+            dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle)
         });
 
+        // wait for the end of the main task, handle result
         match workload_runs.await {
             Ok(Ok(_)) => {
                 tracing::info!("Success");
@@ -430,7 +178,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
             }
             Ok(Err(error)) => {
                 tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
-                mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
+                dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
                 tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
                 Err(error)
             },
@@ -438,7 +186,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                 match join_error.try_into_panic() {
                     Ok(panic) => {
                         tracing::error!("invocation panicked, attempting to report the failure to dashboard");
-                        mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
+                        dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
                         std::panic::resume_unwind(panic)
                     }
                     Err(_) => {
@@ -453,707 +201,3 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
 
     Ok(())
 }
-
-async fn mark_as_failed(
-    dashboard_client: Client,
-    invocation_uuid: Uuid,
-    failure_reason: Option<String>,
-) {
-    let response = dashboard_client
-        .post("cancel-invocation")
-        .json(&json!({
-            "invocation_uuid": invocation_uuid,
-            "failure_reason": failure_reason,
-        }))
-        .send()
-        .await;
-    let response = match response {
-        Ok(response) => response,
-        Err(response_error) => {
-            tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
-            return;
-        }
-    };
-
-    if !response.status().is_success() {
-        tracing::error!(
-            %invocation_uuid,
-            "could not mark invocation as failed: {}",
-            response.text().await.unwrap()
-        );
-        return;
-    }
-    tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
-}
-
-#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
-#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
-async fn run_workload(
-    assets_client: &Client,
-    dashboard_client: &Client,
-    logs_client: &Client,
-    meili_client: &Client,
-    invocation_uuid: Uuid,
-    master_key: Option<&str>,
-    workload: Workload,
-    args: &BenchDeriveArgs,
-) -> anyhow::Result<()> {
-    fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
-
-    let response = dashboard_client
-        .put("workload")
-        .json(&json!({
-            "invocation_uuid": invocation_uuid,
-            "name": &workload.name,
-            "max_runs": workload.run_count,
-        }))
-        .send()
-        .await
-        .context("could not create new workload")?;
-
-    if !response.status().is_success() {
-        bail!("creating new workload failed: {}", response.text().await.unwrap())
-    }
-
-    let workload_uuid: Uuid =
-        response.json().await.context("could not deserialize JSON as UUID")?;
-
-    let mut tasks = Vec::new();
-
-    for i in 0..workload.run_count {
-        tasks.push(
-            run_workload_run(
-                dashboard_client,
-                logs_client,
-                meili_client,
-                workload_uuid,
-                master_key,
-                &workload,
-                args,
-                i,
-            )
-            .await?,
-        );
-    }
-
-    let mut reports = Vec::with_capacity(workload.run_count as usize);
-
-    for task in tasks {
-        reports.push(
-            task.await
-                .context("task panicked while processing report")?
-                .context("task failed while processing report")?,
-        );
-    }
-
-    tracing::info!(workload = workload.name, "Successful workload");
-
-    Ok(())
-}
-
-#[tracing::instrument(skip(client, assets), fields(asset_count = assets.len()))]
-async fn fetch_assets(
-    client: &Client,
-    assets: &BTreeMap<String, Asset>,
-    asset_folder: &str,
-) -> anyhow::Result<()> {
-    let mut download_tasks = tokio::task::JoinSet::new();
-    for (name, asset) in assets {
-        // trying local
-        if let Some(local) = &asset.local_location {
-            match std::fs::File::open(local) {
-                Ok(file) => {
-                    if check_sha256(name, asset, file)? {
-                        continue;
-                    } else {
-                        tracing::warn!(asset = name, file = local, "found local resource for asset but hash differed, skipping to asset store");
-                    }
-                }
-                Err(error) => match error.kind() {
-                    std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
-                    }
-                    _ => tracing::warn!(
-                        error = &error as &dyn std::error::Error,
-                        "error checking local resource, skipping to asset store"
-                    ),
-                },
-            }
-        }
-
-        // checking asset store
-        let store_filename = format!("{}/{}", asset_folder, name);
-
-        match std::fs::File::open(&store_filename) {
-            Ok(file) => {
-                if check_sha256(name, asset, file)? {
-                    continue;
-                } else {
-                    tracing::warn!(asset = name, file = store_filename, "found resource for asset in asset store, but hash differed, skipping to remote method");
-                }
-            }
-            Err(error) => match error.kind() {
-                std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
-                }
-                _ => tracing::warn!(
-                    error = &error as &dyn std::error::Error,
-                    "error checking resource in store, skipping to remote method"
-                ),
-            },
-        }
-
-        // downloading remote
-        match &asset.remote_location {
-            Some(location) => {
-                std::fs::create_dir_all(asset_folder).with_context(|| format!("could not create asset folder at {asset_folder}"))?;
-                download_tasks.spawn({
-                    let client = client.clone();
-                    let name = name.to_string();
-                    let location = location.to_string();
-                    let store_filename = store_filename.clone();
-                    let asset = asset.clone();
-                    download_asset(client, name, asset, location, store_filename)});
-            },
-            None => bail!("asset {name} has no remote location, but was not found locally or in the asset store"),
-        }
-    }
-
-    while let Some(res) = download_tasks.join_next().await {
-        res.context("download task panicked")?.context("download task failed")?;
-    }
-
-    Ok(())
-}
-
-fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::Result<bool> {
-    let mut bytes = Vec::new();
-    file.read_to_end(&mut bytes).with_context(|| format!("hashing file for asset {name}"))?;
-    let mut file_hash = sha2::Sha256::new();
-    file_hash.update(&bytes);
-    let file_hash = file_hash.finalize();
-    let file_hash = format!("{:x}", file_hash);
-    tracing::debug!(hash = file_hash, "hashed local file");
-
-    Ok(match &asset.sha256 {
-        Some(hash) => {
-            tracing::debug!(hash, "hash from workload");
-            if hash.to_ascii_lowercase() == file_hash {
-                true
-            } else {
-                tracing::warn!(
-                    file_hash,
-                    asset_hash = hash.to_ascii_lowercase(),
-                    "hashes don't match"
-                );
-                false
-            }
-        }
-        None => {
-            tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file");
-            true
-        }
-    })
-}
-
-#[tracing::instrument(skip(client, asset, name), fields(asset = name))]
-async fn download_asset(
-    client: Client,
-    name: String,
-    asset: Asset,
-    src: String,
-    dest_filename: String,
-) -> anyhow::Result<()> {
-    let context = || format!("failure downloading asset {name} from {src}");
-
-    let response = client.get(&src).send().await.with_context(context)?;
-
-    let file = std::fs::File::options()
-        .create(true)
-        .truncate(true)
-        .write(true)
-        .read(true)
-        .open(&dest_filename)
-        .with_context(|| format!("creating destination file {dest_filename}"))
-        .with_context(context)?;
-
-    let mut dest = std::io::BufWriter::new(
-        file.try_clone().context("cloning I/O handle").with_context(context)?,
-    );
-
-    let total_len: Option<u64> = response
-        .headers()
-        .get(reqwest::header::CONTENT_LENGTH)
-        .and_then(|value| value.to_str().ok())
-        .and_then(|value| value.parse().ok());
-
-    let progress = tokio::spawn({
-        let name = name.clone();
-        async move {
-            loop {
-                match file.metadata().context("could not get file metadata") {
-                    Ok(metadata) => {
-                        let len = metadata.len();
-                        tracing::info!(
-                            asset = name,
-                            downloaded_bytes = len,
-                            total_bytes = total_len,
-                            "asset download in progress"
-                        );
-                    }
-                    Err(error) => {
-                        tracing::warn!(%error, "could not get file metadata");
-                    }
-                }
-                tokio::time::sleep(std::time::Duration::from_secs(60)).await;
-            }
-        }
-    });
-
-    let writing_context = || format!("while writing to destination file at {dest_filename}");
-
-    let mut response = response.bytes_stream();
-
-    while let Some(bytes) =
-        response.try_next().await.context("while downloading file").with_context(context)?
-    {
-        dest.write_all(&bytes).with_context(writing_context).with_context(context)?;
-    }
-
-    progress.abort();
-
-    let mut file = dest.into_inner().with_context(writing_context).with_context(context)?;
-
-    file.rewind().context("while rewinding asset file")?;
-
-    if !check_sha256(&name, &asset, file)? {
-        bail!("asset '{name}': sha256 mismatch for file {dest_filename} downloaded from {src}")
-    }
-
-    Ok(())
-}
-
-#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
-#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
-async fn run_workload_run(
-    dashboard_client: &Client,
-    logs_client: &Client,
-    meili_client: &Client,
-    workload_uuid: Uuid,
-    master_key: Option<&str>,
-    workload: &Workload,
-    args: &BenchDeriveArgs,
-    run_number: u16,
-) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
-    delete_db();
-    build_meilisearch().await?;
-    let meilisearch =
-        start_meilisearch(meili_client, master_key, workload, &args.asset_folder).await?;
-    let processor = run_commands(
-        dashboard_client,
-        logs_client,
-        meili_client,
-        workload_uuid,
-        workload,
-        args,
-        run_number,
-    )
-    .await?;
-
-    kill_meilisearch(meilisearch).await;
-
-    tracing::info!(run_number, "Successful run");
-
-    Ok(processor)
-}
-
-async fn kill_meilisearch(mut meilisearch: tokio::process::Child) {
-    if let Err(error) = meilisearch.kill().await {
-        tracing::warn!(
-            error = &error as &dyn std::error::Error,
-            "while terminating Meilisearch server"
-        )
-    }
-}
-
-#[tracing::instrument]
-async fn build_meilisearch() -> anyhow::Result<()> {
-    let mut command = tokio::process::Command::new("cargo");
-    command.arg("build").arg("--release").arg("-p").arg("meilisearch");
-
-    command.kill_on_drop(true);
-
-    let mut builder = command.spawn().context("error building Meilisearch")?;
-
-    if !builder.wait().await.context("could not build Meilisearch")?.success() {
-        bail!("failed building Meilisearch")
-    }
-
-    Ok(())
-}
-
-#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))]
-async fn start_meilisearch(
-    client: &Client,
-    master_key: Option<&str>,
-    workload: &Workload,
-    asset_folder: &str,
-) -> anyhow::Result<tokio::process::Child> {
-    let mut command = tokio::process::Command::new("cargo");
-    command
-        .arg("run")
-        .arg("--release")
-        .arg("-p")
-        .arg("meilisearch")
-        .arg("--bin")
-        .arg("meilisearch")
-        .arg("--");
-
-    command.arg("--db-path").arg("./_xtask_benchmark.ms");
-    if let Some(master_key) = master_key {
-        command.arg("--master-key").arg(master_key);
-    }
-    command.arg("--experimental-enable-logs-route");
-
-    for extra_arg in workload.extra_cli_args.iter() {
-        command.arg(extra_arg);
-    }
-
-    command.kill_on_drop(true);
-
-    let mut meilisearch = command.spawn().context("Error starting Meilisearch")?;
-
-    wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?;
-
-    Ok(meilisearch)
-}
-
-async fn wait_for_health(
-    client: &Client,
-    meilisearch: &mut tokio::process::Child,
-    assets: &BTreeMap<String, Asset>,
-    asset_folder: &str,
-) -> anyhow::Result<()> {
-    for i in 0..100 {
-        let res = run_command(client.clone(), health_command(), assets, asset_folder).await;
-        if res.is_ok() {
-            // check that this is actually the current Meilisearch instance that answered us
-            if let Some(exit_code) =
-                meilisearch.try_wait().context("cannot check Meilisearch server process status")?
-            {
-                tracing::error!("Got an health response from a different process");
-                bail!("Meilisearch server exited early with code {exit_code}");
-            }
-
-            return Ok(());
-        }
-        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
-        // check whether the Meilisearch instance exited early (cut the wait)
-        if let Some(exit_code) =
-            meilisearch.try_wait().context("cannot check Meilisearch server process status")?
-        {
-            bail!("Meilisearch server exited early with code {exit_code}");
-        }
-        tracing::debug!(attempt = i, "Waiting for Meilisearch to go up");
-    }
-    bail!("meilisearch is not responding")
-}
-
-fn health_command() -> Command {
-    Command {
-        route: "/health".into(),
-        method: Method::GET,
-        body: Default::default(),
-        synchronous: SyncMode::WaitForResponse,
-    }
-}
-
-fn delete_db() {
-    let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms");
-}
-
-async fn run_commands(
-    dashboard_client: &Client,
-    logs_client: &Client,
-    meili_client: &Client,
-    workload_uuid: Uuid,
-    workload: &Workload,
-    args: &BenchDeriveArgs,
-    run_number: u16,
-) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
-    let report_folder = &args.report_folder;
-    let workload_name = &workload.name;
-
-    std::fs::create_dir_all(report_folder)
-        .with_context(|| format!("could not create report directory at {report_folder}"))?;
-
-    let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
-    let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");
-
-    let report_handle = start_report(logs_client, trace_filename).await?;
-
-    for batch in workload
-        .commands
-        .as_slice()
-        .split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
-    {
-        run_batch(meili_client, batch, &workload.assets, &args.asset_folder).await?;
-    }
-
-    let processor =
-        stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
-            .await?;
-
-    Ok(processor)
-}
-
-async fn stop_report(
-    dashboard_client: &Client,
-    logs_client: &Client,
-    workload_uuid: Uuid,
-    filename: String,
-    report_handle: tokio::task::JoinHandle<anyhow::Result<std::fs::File>>,
-) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
-    let response = logs_client.delete("").send().await.context("while stopping report")?;
-    if !response.status().is_success() {
-        bail!("received HTTP {} while stopping report", response.status())
-    }
-
-    let mut file = tokio::time::timeout(std::time::Duration::from_secs(1000), report_handle)
-        .await
-        .context("while waiting for the end of the report")?
-        .context("report writing task panicked")?
-        .context("while writing report")?;
-
-    file.rewind().context("while rewinding report file")?;
-
-    let process_handle = tokio::task::spawn({
-        let dashboard_client = dashboard_client.clone();
-        async move {
-            let span = tracing::info_span!("processing trace to report", filename);
-            let _guard = span.enter();
-            let report = tracing_trace::processor::span_stats::to_call_stats(
-                tracing_trace::TraceReader::new(std::io::BufReader::new(file)),
-            )
-            .context("could not convert trace to report")?;
-            let context = || format!("writing report to {filename}");
-
-            let response = dashboard_client
-                .put("run")
-                .json(&json!({
-                    "workload_uuid": workload_uuid,
-                    "data": report
-                }))
-                .send()
-                .await
-                .context("sending new run")?;
-
-            if !response.status().is_success() {
-                bail!(
-                    "sending new run failed: {}",
-                    response.text().await.unwrap_or_else(|_| "unknown".into())
-                )
-            }
-
-            let mut output_file = std::io::BufWriter::new(
-                std::fs::File::options()
-                    .create(true)
-                    .truncate(true)
-                    .write(true)
-                    .read(true)
-                    .open(&filename)
-                    .with_context(context)?,
-            );
-
-            for (key, value) in report {
-                serde_json::to_writer(&mut output_file, &json!({key: value}))
-                    .context("serializing span stat")?;
-                writeln!(&mut output_file).with_context(context)?;
-            }
-            output_file.flush().with_context(context)?;
-            let mut output_file = output_file.into_inner().with_context(context)?;
-
-            output_file.rewind().context("could not rewind output_file").with_context(context)?;
-
-            tracing::info!("success");
-            Ok(output_file)
-        }
-    });
-
-    Ok(process_handle)
-}
-
-async fn start_report(
-    logs_client: &Client,
-    filename: String,
-) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
-    let report_file = std::fs::File::options()
-        .create(true)
-        .truncate(true)
-        .write(true)
-        .read(true)
-        .open(&filename)
-        .with_context(|| format!("could not create file at {filename}"))?;
-    let mut report_file = std::io::BufWriter::new(report_file);
-
-    let response = logs_client
-        .post("")
-        .json(&json!({
-            "mode": "profile",
-            "target": "indexing::=trace"
-        }))
-        .send()
-        .await
-        .context("failed to start report")?;
-
-    let code = response.status();
-    if code.is_client_error() {
-        tracing::error!(%code, "request error when trying to start report");
-        let response: serde_json::Value = response
-            .json()
-            .await
-            .context("could not deserialize response as JSON")
-            .context("response error when trying to start report")?;
-        bail!(
-            "request error when trying to start report: server responded with error code {code} and '{response}'"
-        )
-    } else if code.is_server_error() {
-        tracing::error!(%code, "server error when trying to start report");
-        let response: serde_json::Value = response
-            .json()
-            .await
-            .context("could not deserialize response as JSON")
-            .context("response error trying to start report")?;
-        bail!("server error when trying to start report: server responded with error code {code} and '{response}'")
-    }
-
-    Ok(tokio::task::spawn(async move {
-        let mut stream = response.bytes_stream();
-        while let Some(bytes) = stream.try_next().await.context("while waiting for report")? {
-            report_file
-                .write_all(&bytes)
-                .with_context(|| format!("while writing report to {filename}"))?;
-        }
-        report_file.into_inner().with_context(|| format!("while writing report to {filename}"))
-    }))
-}
-
-async fn run_batch(
-    client: &Client,
-    batch: &[Command],
-    assets: &BTreeMap<String, Asset>,
-    asset_folder: &str,
-) -> anyhow::Result<()> {
-    let [.., last] = batch else { return Ok(()) };
-    let sync = last.synchronous;
-
-    let mut tasks = tokio::task::JoinSet::new();
-
-    for command in batch {
-        // FIXME: you probably don't want to copy assets everytime here
-        tasks.spawn({
-            let client = client.clone();
-            let command = command.clone();
-            let assets = assets.clone();
-            let asset_folder = asset_folder.to_owned();
-
-            async move { run_command(client, command, &assets, &asset_folder).await }
-        });
-    }
-
-    while let Some(result) = tasks.join_next().await {
-        result
-            .context("panicked while executing command")?
-            .context("error while executing command")?;
-    }
-
-    match sync {
-        SyncMode::DontWait => {}
-        SyncMode::WaitForResponse => {}
-        SyncMode::WaitForTask => wait_for_tasks(client).await?,
-    }
-
-    Ok(())
-}
-
-async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
-    loop {
-        let response = client
-            .get("tasks?statuses=enqueued,processing")
-            .send()
-            .await
-            .context("could not wait for tasks")?;
-        let response: serde_json::Value = response
-            .json()
-            .await
-            .context("could not deserialize response to JSON")
-            .context("could not wait for tasks")?;
-        match response.get("total") {
-            Some(serde_json::Value::Number(number)) => {
-                let number = number.as_u64().with_context(|| {
-                    format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
-                })?;
-                if number == 0 {
-                    break;
-                } else {
-                    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
-                    continue;
-                }
-            }
-            Some(thing_else) => {
-                bail!(format!(
-                    "waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
-                ))
-            }
-            None => {
-                bail!(format!(
-                    "waiting for tasks: expected response to contain 'total', got '{response}'"
-                ))
-            }
-        }
-    }
-    Ok(())
-}
-
-#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
-async fn run_command(
-    client: Client,
-    mut command: Command,
-    assets: &BTreeMap<String, Asset>,
-    asset_folder: &str,
-) -> anyhow::Result<()> {
-    // memtake the body here to leave an empty body in its place, so that command is not partially moved-out
-    let body = std::mem::take(&mut command.body)
-        .get(assets, asset_folder)
-        .with_context(|| format!("while getting body for command {command}"))?;
-
-    let request = client.request(command.method.into(), &command.route);
-
-    let request = if let Some((body, content_type)) = body {
-        request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
-    } else {
-        request
-    };
-
-    let response =
-        request.send().await.with_context(|| format!("error sending command: {}", command))?;
-
-    let code = response.status();
-    if code.is_client_error() {
-        tracing::error!(%command, %code, "error in workload file");
-        let response: serde_json::Value = response
-            .json()
-            .await
-            .context("could not deserialize response as JSON")
-            .context("parsing error in workload file when sending command")?;
-        bail!("error in workload file: server responded with error code {code} and '{response}'")
-    } else if code.is_server_error() {
-        tracing::error!(%command, %code, "server error");
-        let response: serde_json::Value = response
-            .json()
-            .await
-            .context("could not deserialize response as JSON")
-            .context("parsing server error when sending command")?;
-        bail!("server error: server responded with error code {code} and '{response}'")
-    }
-
-    Ok(())
-}
diff --git a/xtask/src/bench/workload.rs b/xtask/src/bench/workload.rs
new file mode 100644
index 000000000..b3e952f29
--- /dev/null
+++ b/xtask/src/bench/workload.rs
@@ -0,0 +1,262 @@
+use std::collections::BTreeMap;
+use std::fs::File;
+use std::io::{Seek as _, Write as _};
+
+use anyhow::{bail, Context as _};
+use futures_util::TryStreamExt as _;
+use serde::Deserialize;
+use serde_json::json;
+use tokio::task::JoinHandle;
+use uuid::Uuid;
+
+use super::assets::Asset;
+use super::client::Client;
+use super::command::SyncMode;
+use super::BenchDeriveArgs;
+use crate::bench::{assets, dashboard, meili_process};
+
+#[derive(Deserialize)]
+pub struct Workload {
+    pub name: String,
+    pub run_count: u16,
+    pub extra_cli_args: Vec<String>,
+    pub assets: BTreeMap<String, Asset>,
+    pub commands: Vec<super::command::Command>,
+}
+
+async fn run_commands(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    workload_uuid: Uuid,
+    workload: &Workload,
+    args: &BenchDeriveArgs,
+    run_number: u16,
+) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
+    let report_folder = &args.report_folder;
+    let workload_name = &workload.name;
+
+    std::fs::create_dir_all(report_folder)
+        .with_context(|| format!("could not create report directory at {report_folder}"))?;
+
+    let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
+    let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");
+
+    let report_handle = start_report(logs_client, trace_filename).await?;
+
+    for batch in workload
+        .commands
+        .as_slice()
+        .split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
+    {
+        super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
+            .await?;
+    }
+
+    let processor =
+        stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
+            .await?;
+
+    Ok(processor)
+}
+
+#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
+#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
+pub async fn execute(
+    assets_client: &Client,
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    invocation_uuid: Uuid,
+    master_key: Option<&str>,
+    workload: Workload,
+    args: &BenchDeriveArgs,
+) -> anyhow::Result<()> {
+    assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
+
+    let workload_uuid =
+        dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?;
+
+    let mut tasks = Vec::new();
+
+    for i in 0..workload.run_count {
+        tasks.push(
+            execute_run(
+                dashboard_client,
+                logs_client,
+                meili_client,
+                workload_uuid,
+                master_key,
+                &workload,
+                args,
+                i,
+            )
+            .await?,
+        );
+    }
+
+    let mut reports = Vec::with_capacity(workload.run_count as usize);
+
+    for task in tasks {
+        reports.push(
+            task.await
+                .context("task panicked while processing report")?
+                .context("task failed while processing report")?,
+        );
+    }
+
+    tracing::info!(workload = workload.name, "Successful workload");
+
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
+#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
+async fn execute_run(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    workload_uuid: Uuid,
+    master_key: Option<&str>,
+    workload: &Workload,
+    args: &BenchDeriveArgs,
+    run_number: u16,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    meili_process::delete_db();
+
+    meili_process::build().await?;
+    let meilisearch =
+        meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?;
+
+    let processor = run_commands(
+        dashboard_client,
+        logs_client,
+        meili_client,
+        workload_uuid,
+        workload,
+        args,
+        run_number,
+    )
+    .await?;
+
+    meili_process::kill(meilisearch).await;
+
+    tracing::info!(run_number, "Successful run");
+
+    Ok(processor)
+}
+
+async fn start_report(
+    logs_client: &Client,
+    filename: String,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let report_file = std::fs::File::options()
+        .create(true)
+        .truncate(true)
+        .write(true)
+        .read(true)
+        .open(&filename)
+        .with_context(|| format!("could not create file at {filename}"))?;
+    let mut report_file = std::io::BufWriter::new(report_file);
+
+    let response = logs_client
+        .post("")
+        .json(&json!({
+            "mode": "profile",
+            "target": "indexing::=trace"
+        }))
+        .send()
+        .await
+        .context("failed to start report")?;
+
+    let code = response.status();
+    if code.is_client_error() {
+        tracing::error!(%code, "request error when trying to start report");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("response error when trying to start report")?;
+        bail!(
+            "request error when trying to start report: server responded with error code {code} and '{response}'"
+        )
+    } else if code.is_server_error() {
+        tracing::error!(%code, "server error when trying to start report");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("response error trying to start report")?;
+        bail!("server error when trying to start report: server responded with error code {code} and '{response}'")
+    }
+
+    Ok(tokio::task::spawn(async move {
+        let mut stream = response.bytes_stream();
+        while let Some(bytes) = stream.try_next().await.context("while waiting for report")? {
+            report_file
+                .write_all(&bytes)
+                .with_context(|| format!("while writing report to {filename}"))?;
+        }
+        report_file.into_inner().with_context(|| format!("while writing report to {filename}"))
+    }))
+}
+
+async fn stop_report(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    workload_uuid: Uuid,
+    filename: String,
+    report_handle: tokio::task::JoinHandle<anyhow::Result<std::fs::File>>,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let response = logs_client.delete("").send().await.context("while stopping report")?;
+    if !response.status().is_success() {
+        bail!("received HTTP {} while stopping report", response.status())
+    }
+
+    let mut file = tokio::time::timeout(std::time::Duration::from_secs(1000), report_handle)
+        .await
+        .context("while waiting for the end of the report")?
+        .context("report writing task panicked")?
+        .context("while writing report")?;
+
+    file.rewind().context("while rewinding report file")?;
+
+    let process_handle = tokio::task::spawn({
+        let dashboard_client = dashboard_client.clone();
+        async move {
+            let span = tracing::info_span!("processing trace to report", filename);
+            let _guard = span.enter();
+            let report = tracing_trace::processor::span_stats::to_call_stats(
+                tracing_trace::TraceReader::new(std::io::BufReader::new(file)),
+            )
+            .context("could not convert trace to report")?;
+            let context = || format!("writing report to {filename}");
+
+            dashboard::create_run(dashboard_client, workload_uuid, &report).await?;
+
+            let mut output_file = std::io::BufWriter::new(
+                std::fs::File::options()
+                    .create(true)
+                    .truncate(true)
+                    .write(true)
+                    .read(true)
+                    .open(&filename)
+                    .with_context(context)?,
+            );
+
+            for (key, value) in report {
+                serde_json::to_writer(&mut output_file, &json!({key: value}))
+                    .context("serializing span stat")?;
+                writeln!(&mut output_file).with_context(context)?;
+            }
+            output_file.flush().with_context(context)?;
+            let mut output_file = output_file.into_inner().with_context(context)?;
+
+            output_file.rewind().context("could not rewind output_file").with_context(context)?;
+
+            Ok(output_file)
+        }
+    });
+
+    Ok(process_handle)
+}

From 15c38dca78a9dd1377715a4aecca4f762aebdc02 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 5 Mar 2024 14:44:48 +0100
Subject: [PATCH 10/12] Output RFC 3339 dates where we can

Co-authored-by: Tamo <tamo@meilisearch.com>
---
 meilisearch/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index 79ca7ec80..35658fc92 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -195,7 +195,7 @@ pub fn print_launch_resume(
         build_info
             .commit_timestamp
             .and_then(|commit_timestamp| commit_timestamp
-                .format(&time::format_description::well_known::Iso8601::DEFAULT)
+                .format(&time::format_description::well_known::Rfc3339::DEFAULT)
                 .ok())
             .unwrap_or("unknown".into())
     );

From 663629a9d60fbace1e498fe72f7c40bbfcdac998 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 5 Mar 2024 14:45:06 +0100
Subject: [PATCH 11/12] Remove unused build dependency from xtask

Co-authored-by: Tamo <tamo@meilisearch.com>
---
 xtask/Cargo.toml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml
index 0df8161ce..562dfddb3 100644
--- a/xtask/Cargo.toml
+++ b/xtask/Cargo.toml
@@ -42,6 +42,3 @@ tracing = "0.1.40"
 tracing-subscriber = "0.3.18"
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 uuid = { version = "1.7.0", features = ["v7", "serde"] }
-
-[build-dependencies]
-anyhow = "1.0.79"

From 7408db2a46a3620d21b5c7f414c71431d1343ff9 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 5 Mar 2024 14:56:48 +0100
Subject: [PATCH 12/12] Meilisearch: fix date formatting

---
 meilisearch/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index 35658fc92..3451325b2 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -195,7 +195,7 @@ pub fn print_launch_resume(
         build_info
             .commit_timestamp
             .and_then(|commit_timestamp| commit_timestamp
-                .format(&time::format_description::well_known::Rfc3339::DEFAULT)
+                .format(&time::format_description::well_known::Rfc3339)
                 .ok())
             .unwrap_or("unknown".into())
     );