diff --git a/.cargo/config.toml b/.cargo/config.toml index 35049cbcb..e11d56a31 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,2 @@ [alias] -xtask = "run --package xtask --" +xtask = "run --release --package xtask --" diff --git a/.github/workflows/bench-manual.yml b/.github/workflows/bench-manual.yml new file mode 100644 index 000000000..6d8c3a006 --- /dev/null +++ b/.github/workflows/bench-manual.yml @@ -0,0 +1,30 @@ +name: Bench (manual) + +on: + workflow_dispatch: + inputs: + workload: + description: 'The path to the workloads to execute (workloads/...)' + required: true + default: 'workloads/movies.json' + +env: + WORKLOAD_NAME: ${{ github.event.inputs.workload }} + +jobs: + benchmarks: + name: Run and upload benchmarks + runs-on: benchmarks + timeout-minutes: 180 # 3h + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }} + run: | + cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME} + diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml new file mode 100644 index 000000000..6f4956542 --- /dev/null +++ b/.github/workflows/bench-pr.yml @@ -0,0 +1,46 @@ +name: Bench (PR) +on: + issue_comment: + types: [created] + +permissions: + issues: write + +env: + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + +jobs: + run-benchmarks-on-comment: + if: startsWith(github.event.comment.body, '/bench') + name: Run and upload benchmarks + runs-on: benchmarks + timeout-minutes: 180 # 3h + steps: + - name: Check for Command + id: command + uses: xt0rted/slash-command-action@v2 + with: + command: bench + reaction-type: "rocket" + repo-token: ${{ env.GH_TOKEN }} + + - uses: xt0rted/pull-request-comment-branch@v2 + id: comment-branch + with: + repo_token: ${{ env.GH_TOKEN }} + + - uses: actions/checkout@v3 + if: success() + with: + fetch-depth: 0 # fetch full history to be able to get main commit sha + ref: ${{ steps.comment-branch.outputs.head_ref }} + + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + - name: Run benchmarks on PR ${{ github.event.issue.id }} + run: | + cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }} \ No newline at end of file diff --git a/.github/workflows/bench-push-indexing.yml b/.github/workflows/bench-push-indexing.yml new file mode 100644 index 000000000..fd0f19a5a --- /dev/null +++ b/.github/workflows/bench-push-indexing.yml @@ -0,0 +1,25 @@ +name: Indexing bench (push) + +on: + push: + branches: + - main + +jobs: + benchmarks: + name: Run and upload benchmarks + runs-on: benchmarks + timeout-minutes: 180 # 3h + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + # Run benchmarks + - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }} + run: | + cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json + diff --git a/.gitignore b/.gitignore index 5f660c735..e00f45c1e 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,8 @@ /data.ms /snapshots /dumps +/bench +/_xtask_benchmark.ms # Snapshots ## ... large diff --git a/Cargo.lock b/Cargo.lock index 971ab602a..700bb2653 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -356,9 +356,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" dependencies = [ "backtrace", ] @@ -440,6 +440,12 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "atomic" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" + [[package]] name = "atomic-polyfill" version = "0.1.11" @@ -622,6 +628,15 @@ dependencies = [ "serde", ] +[[package]] +name = "build-info" +version = "1.7.0" +dependencies = [ + "anyhow", + "time", + "vergen-git2", +] + [[package]] name = "bumpalo" version = "3.13.0" @@ -1342,7 +1357,16 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" dependencies = [ - "derive_builder_macro", + "derive_builder_macro 0.12.0", +] + +[[package]] +name = "derive_builder" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f59169f400d8087f238c5c0c7db6a28af18681717f3b623227d92f397e938c7" +dependencies = [ + "derive_builder_macro 0.13.1", ] [[package]] @@ -1357,13 +1381,35 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_builder_core" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4ec317cc3e7ef0928b0ca6e4a634a4d6c001672ae210438cf114a83e56b018d" +dependencies = [ + "darling 0.14.4", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "derive_builder_macro" version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" dependencies = [ - "derive_builder_core", + "derive_builder_core 0.12.0", + "syn 1.0.109", +] + +[[package]] +name = "derive_builder_macro" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "870368c3fb35b8031abb378861d4460f573b92238ec2152c927a21f77e3e0127" +dependencies = [ + "derive_builder_core 0.13.1", "syn 1.0.109", ] @@ -2082,11 +2128,11 @@ checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" [[package]] name = "git2" -version = "0.16.1" +version = "0.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc" +checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.1", "libc", "libgit2-sys", "log", @@ -2383,7 +2429,7 @@ dependencies = [ "bincode", "crossbeam", "csv", - "derive_builder", + "derive_builder 0.12.0", "dump", "enum-iterator", "file-store", @@ -2500,7 +2546,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" dependencies = [ "hermit-abi", - "rustix 0.38.26", + "rustix 0.38.31", "windows-sys 0.52.0", ] @@ -2622,15 +2668,15 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.150" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libgit2-sys" -version = "0.14.2+1.5.1" +version = "0.16.2+1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4" +checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" dependencies = [ "cc", "libc", @@ -2677,9 +2723,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.12" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" dependencies = [ "cc", "libc", @@ -3116,6 +3162,7 @@ dependencies = [ "async-trait", "brotli", "bstr", + "build-info", "byte-unit", "bytes", "cargo_toml", @@ -3187,7 +3234,6 @@ dependencies = [ "url", "urlencoding", "uuid", - "vergen", "walkdir", "yaup", "zip", @@ -3488,6 +3534,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-conv" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" + [[package]] name = "num-integer" version = "0.1.45" @@ -3518,6 +3570,15 @@ dependencies = [ "libc", ] +[[package]] +name = "num_threads" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9" +dependencies = [ + "libc", +] + [[package]] name = "number_prefix" version = "0.4.0" @@ -4132,15 +4193,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_users" version = "0.4.3" @@ -4218,10 +4270,12 @@ dependencies = [ "system-configuration", "tokio", "tokio-rustls 0.24.1", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "webpki-roots 0.25.3", "winreg", @@ -4329,9 +4383,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.26" +version = "0.38.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a" +checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" dependencies = [ "bitflags 2.4.1", "errno", @@ -4867,14 +4921,13 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.9.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", - "redox_syscall 0.4.1", - "rustix 0.38.26", + "rustix 0.38.31", "windows-sys 0.52.0", ] @@ -4934,12 +4987,15 @@ dependencies = [ [[package]] name = "time" -version = "0.3.31" +version = "0.3.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e" +checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749" dependencies = [ "deranged", "itoa", + "libc", + "num-conv", + "num_threads", "powerfmt", "serde", "time-core", @@ -4954,10 +5010,11 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f" +checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774" dependencies = [ + "num-conv", "time-core", ] @@ -4992,7 +5049,7 @@ version = "0.14.1" source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375" dependencies = [ "aho-corasick", - "derive_builder", + "derive_builder 0.12.0", "esaxx-rs", "getrandom", "itertools 0.11.0", @@ -5395,10 +5452,11 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ + "atomic", "getrandom", "serde", ] @@ -5417,18 +5475,42 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" [[package]] name = "vergen" -version = "7.5.1" +version = "9.0.0-beta.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f21b881cd6636ece9735721cf03c1fe1e774fe258683d084bb2812ab67435749" +checksum = "107dc53b443fe8cc380798abb75ad6b7038281165109afea1f1b28bb47047ed5" dependencies = [ "anyhow", - "cfg-if", - "enum-iterator", + "derive_builder 0.13.1", "getset", + "rustversion", + "vergen-lib", +] + +[[package]] +name = "vergen-git2" +version = "1.0.0-beta.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8875c5d71074bb67118774e3d795ab6fe77c3ae3161cb54e19104cabc49487f1" +dependencies = [ + "anyhow", + "derive_builder 0.13.1", "git2", "rustversion", - "thiserror", "time", + "vergen", + "vergen-lib", +] + +[[package]] +name = "vergen-lib" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26ebfba72ba904559f25f41ea1512335b5a46459084258cea0857549d9645187" +dependencies = [ + "anyhow", + "derive_builder 0.13.1", + "getset", + "rustversion", ] [[package]] @@ -5539,6 +5621,19 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +[[package]] +name = "wasm-streams" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "wav" version = "1.0.0" @@ -5843,9 +5938,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" [[package]] name = "winnow" -version = "0.5.4" +version = "0.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acaaa1190073b2b101e15083c38ee8ec891b5e05cbee516521e94ec008f61e64" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" dependencies = [ "memchr", ] @@ -5873,8 +5968,23 @@ dependencies = [ name = "xtask" version = "1.7.0" dependencies = [ + "anyhow", + "build-info", "cargo_metadata", "clap", + "futures-core", + "futures-util", + "reqwest", + "serde", + "serde_json", + "sha2", + "sysinfo", + "time", + "tokio", + "tracing", + "tracing-subscriber", + "tracing-trace", + "uuid", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 11190025a..1d79fd196 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ members = [ "benchmarks", "fuzzers", "tracing-trace", - "xtask", + "xtask", "build-info", ] [workspace.package] diff --git a/Dockerfile b/Dockerfile index dd2cfc134..5b227e6fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,7 +8,7 @@ WORKDIR / ARG COMMIT_SHA ARG COMMIT_DATE ARG GIT_TAG -ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG} +ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG} ENV RUSTFLAGS="-C target-feature=-crt-static" COPY . . diff --git a/build-info/Cargo.toml b/build-info/Cargo.toml new file mode 100644 index 000000000..50854a642 --- /dev/null +++ b/build-info/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "build-info" +version.workspace = true +authors.workspace = true +description.workspace = true +homepage.workspace = true +readme.workspace = true +edition.workspace = true +license.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +time = { version = "0.3.34", features = ["parsing"] } + +[build-dependencies] +anyhow = "1.0.80" +vergen-git2 = "1.0.0-beta.2" diff --git a/build-info/build.rs b/build-info/build.rs new file mode 100644 index 000000000..b1ec0ab47 --- /dev/null +++ b/build-info/build.rs @@ -0,0 +1,22 @@ +fn main() { + if let Err(err) = emit_git_variables() { + println!("cargo:warning=vergen: {}", err); + } +} + +fn emit_git_variables() -> anyhow::Result<()> { + // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them + // in the corresponding GitHub workflow (publish_docker.yml). + // This is due to the Dockerfile building the binary outside of the git directory. + let mut builder = vergen_git2::Git2Builder::default(); + + builder.branch(true); + builder.commit_timestamp(true); + builder.commit_message(true); + builder.describe(true, true, None); + builder.sha(false); + + let git2 = builder.build()?; + + vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit() +} diff --git a/build-info/src/lib.rs b/build-info/src/lib.rs new file mode 100644 index 000000000..cfcefb4a2 --- /dev/null +++ b/build-info/src/lib.rs @@ -0,0 +1,203 @@ +use time::format_description::well_known::Iso8601; + +#[derive(Debug, Clone)] +pub struct BuildInfo { + pub branch: Option<&'static str>, + pub describe: Option, + pub commit_sha1: Option<&'static str>, + pub commit_msg: Option<&'static str>, + pub commit_timestamp: Option, +} + +impl BuildInfo { + pub fn from_build() -> Self { + let branch: Option<&'static str> = option_env!("VERGEN_GIT_BRANCH"); + let describe = DescribeResult::from_build(); + let commit_sha1 = option_env!("VERGEN_GIT_SHA"); + let commit_msg = option_env!("VERGEN_GIT_COMMIT_MESSAGE"); + let commit_timestamp = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP"); + + let commit_timestamp = commit_timestamp.and_then(|commit_timestamp| { + time::OffsetDateTime::parse(commit_timestamp, &Iso8601::DEFAULT).ok() + }); + + Self { branch, describe, commit_sha1, commit_msg, commit_timestamp } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum DescribeResult { + Prototype { name: &'static str }, + Release { version: &'static str, major: u64, minor: u64, patch: u64 }, + Prerelease { version: &'static str, major: u64, minor: u64, patch: u64, rc: u64 }, + NotATag { describe: &'static str }, +} + +impl DescribeResult { + pub fn new(describe: &'static str) -> Self { + if let Some(name) = prototype_name(describe) { + Self::Prototype { name } + } else if let Some(release) = release_version(describe) { + release + } else if let Some(prerelease) = prerelease_version(describe) { + prerelease + } else { + Self::NotATag { describe } + } + } + + pub fn from_build() -> Option { + let describe: &'static str = option_env!("VERGEN_GIT_DESCRIBE")?; + Some(Self::new(describe)) + } + + pub fn as_tag(&self) -> Option<&'static str> { + match self { + DescribeResult::Prototype { name } => Some(name), + DescribeResult::Release { version, .. } => Some(version), + DescribeResult::Prerelease { version, .. } => Some(version), + DescribeResult::NotATag { describe: _ } => None, + } + } + + pub fn as_prototype(&self) -> Option<&'static str> { + match self { + DescribeResult::Prototype { name } => Some(name), + DescribeResult::Release { .. } + | DescribeResult::Prerelease { .. } + | DescribeResult::NotATag { .. } => None, + } + } +} + +/// Parses the input as a prototype name. +/// +/// Returns `Some(prototype_name)` if the following conditions are met on this value: +/// +/// 1. starts with `prototype-`, +/// 2. ends with `-`, +/// 3. does not end with `-`. +/// +/// Otherwise, returns `None`. +fn prototype_name(describe: &'static str) -> Option<&'static str> { + if !describe.starts_with("prototype-") { + return None; + } + + let mut rsplit_prototype = describe.rsplit('-'); + // last component MUST be a number + rsplit_prototype.next()?.parse::().ok()?; + // before than last component SHALL NOT be a number + rsplit_prototype.next()?.parse::().err()?; + + Some(describe) +} + +fn release_version(describe: &'static str) -> Option { + if !describe.starts_with('v') { + return None; + } + + // full release version don't contain a `-` + if describe.contains('-') { + return None; + } + + // full release version parse as vX.Y.Z, with X, Y, Z numbers. + let mut dots = describe[1..].split('.'); + let major: u64 = dots.next()?.parse().ok()?; + let minor: u64 = dots.next()?.parse().ok()?; + let patch: u64 = dots.next()?.parse().ok()?; + + if dots.next().is_some() { + return None; + } + + Some(DescribeResult::Release { version: describe, major, minor, patch }) +} + +fn prerelease_version(describe: &'static str) -> Option { + // prerelease version is in the shape vM.N.P-rc.C + let mut hyphen = describe.rsplit('-'); + let prerelease = hyphen.next()?; + if !prerelease.starts_with("rc.") { + return None; + } + + let rc: u64 = prerelease[3..].parse().ok()?; + + let release = hyphen.next()?; + + let DescribeResult::Release { version: _, major, minor, patch } = release_version(release)? + else { + return None; + }; + + Some(DescribeResult::Prerelease { version: describe, major, minor, patch, rc }) +} + +#[cfg(test)] +mod test { + use super::DescribeResult; + + fn assert_not_a_tag(describe: &'static str) { + assert_eq!(DescribeResult::NotATag { describe }, DescribeResult::new(describe)) + } + + fn assert_proto(describe: &'static str) { + assert_eq!(DescribeResult::Prototype { name: describe }, DescribeResult::new(describe)) + } + + fn assert_release(describe: &'static str, major: u64, minor: u64, patch: u64) { + assert_eq!( + DescribeResult::Release { version: describe, major, minor, patch }, + DescribeResult::new(describe) + ) + } + + fn assert_prerelease(describe: &'static str, major: u64, minor: u64, patch: u64, rc: u64) { + assert_eq!( + DescribeResult::Prerelease { version: describe, major, minor, patch, rc }, + DescribeResult::new(describe) + ) + } + + #[test] + fn not_a_tag() { + assert_not_a_tag("whatever-fuzzy"); + assert_not_a_tag("whatever-fuzzy-5-ggg-dirty"); + assert_not_a_tag("whatever-fuzzy-120-ggg-dirty"); + + // technically a tag, but not a proto nor a version, so not parsed as a tag + assert_not_a_tag("whatever"); + + // dirty version + assert_not_a_tag("v1.7.0-1-ggga-dirty"); + assert_not_a_tag("v1.7.0-rc.1-1-ggga-dirty"); + + // after version + assert_not_a_tag("v1.7.0-1-ggga"); + assert_not_a_tag("v1.7.0-rc.1-1-ggga"); + + // after proto + assert_not_a_tag("protoype-tag-0-1-ggga"); + assert_not_a_tag("protoype-tag-0-1-ggga-dirty"); + } + + #[test] + fn prototype() { + assert_proto("prototype-tag-0"); + assert_proto("prototype-tag-10"); + assert_proto("prototype-long-name-tag-10"); + } + + #[test] + fn release() { + assert_release("v1.7.2", 1, 7, 2); + } + + #[test] + fn prerelease() { + assert_prerelease("v1.7.2-rc.3", 1, 7, 2, 3); + } +} diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index fc4f5aa8b..b65c466ca 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -107,6 +107,7 @@ tracing = "0.1.40" tracing-subscriber = { version = "0.3.18", features = ["json"] } tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-actix-web = "0.7.9" +build-info = { version = "1.7.0", path = "../build-info" } [dev-dependencies] actix-rt = "2.9.0" @@ -131,7 +132,6 @@ reqwest = { version = "0.11.23", features = [ sha-1 = { version = "0.10.1", optional = true } static-files = { version = "0.2.3", optional = true } tempfile = { version = "3.9.0", optional = true } -vergen = { version = "7.5.1", default-features = false, features = ["git"] } zip = { version = "0.6.6", optional = true } [features] diff --git a/meilisearch/build.rs b/meilisearch/build.rs index c839b6e33..dc24b0449 100644 --- a/meilisearch/build.rs +++ b/meilisearch/build.rs @@ -1,17 +1,4 @@ -use vergen::{vergen, Config, SemverKind}; - fn main() { - // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them - // in the corresponding GitHub workflow (publish_docker.yml). - // This is due to the Dockerfile building the binary outside of the git directory. - let mut config = Config::default(); - // allow using non-annotated tags - *config.git_mut().semver_kind_mut() = SemverKind::Lightweight; - - if let Err(e) = vergen(config) { - println!("cargo:warning=vergen: {}", e); - } - #[cfg(feature = "mini-dashboard")] mini_dashboard::setup_mini_dashboard().expect("Could not load the mini-dashboard assets"); } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 55ddb4747..7dfc52900 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -473,7 +473,9 @@ impl Segment { create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default()) { // Replace the version number with the prototype name if any. - let version = if let Some(prototype) = crate::prototype_name() { + let version = if let Some(prototype) = build_info::DescribeResult::from_build() + .and_then(|describe| describe.as_prototype()) + { prototype } else { env!("CARGO_PKG_VERSION") diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 9d9274b9d..820f1ae42 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -536,30 +536,3 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) { pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) { config.service(web::resource("/").route(web::get().to(routes::running))); } - -/// Parses the output of -/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions) -/// as a prototype name. -/// -/// Returns `Some(prototype_name)` if the following conditions are met on this value: -/// -/// 1. starts with `prototype-`, -/// 2. ends with `-`, -/// 3. does not end with `-`. -/// -/// Otherwise, returns `None`. -pub fn prototype_name() -> Option<&'static str> { - let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?; - - if !prototype.starts_with("prototype-") { - return None; - } - - let mut rsplit_prototype = prototype.rsplit('-'); - // last component MUST be a number - rsplit_prototype.next()?.parse::().ok()?; - // before than last component SHALL NOT be a number - rsplit_prototype.next()?.parse::().err()?; - - Some(prototype) -} diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index f1f93dd99..3451325b2 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -12,8 +12,8 @@ use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; use meilisearch::option::LogMode; use meilisearch::{ - analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, - LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer, + analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle, + LogStderrType, Opt, SubscriberForSecondLayer, }; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use mimalloc::MiMalloc; @@ -163,8 +163,8 @@ pub fn print_launch_resume( analytics: Arc, config_read_from: Option, ) { - let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); - let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); + let build_info = build_info::BuildInfo::from_build(); + let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" }; let ascii_name = r#" @@ -189,10 +189,18 @@ pub fn print_launch_resume( eprintln!("Database path:\t\t{:?}", opt.db_path); eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr); eprintln!("Environment:\t\t{:?}", opt.env); - eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string()); - eprintln!("Commit date:\t\t{:?}", commit_date.to_string()); + eprintln!("Commit SHA:\t\t{:?}", build_info.commit_sha1.unwrap_or("unknown")); + eprintln!( + "Commit date:\t\t{:?}", + build_info + .commit_timestamp + .and_then(|commit_timestamp| commit_timestamp + .format(&time::format_description::well_known::Rfc3339) + .ok()) + .unwrap_or("unknown".into()) + ); eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string()); - if let Some(prototype) = prototype_name() { + if let Some(prototype) = build_info.describe.and_then(|describe| describe.as_prototype()) { eprintln!("Prototype:\t\t{:?}", prototype); } diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 249103e12..1c1465582 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -359,12 +359,18 @@ async fn get_version( ) -> HttpResponse { analytics.publish("Version Seen".to_string(), json!(null), Some(&req)); - let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); - let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); + let build_info = build_info::BuildInfo::from_build(); HttpResponse::Ok().json(VersionResponse { - commit_sha: commit_sha.to_string(), - commit_date: commit_date.to_string(), + commit_sha: build_info.commit_sha1.unwrap_or("unknown").to_string(), + commit_date: build_info + .commit_timestamp + .and_then(|commit_timestamp| { + commit_timestamp + .format(&time::format_description::well_known::Iso8601::DEFAULT) + .ok() + }) + .unwrap_or("unknown".into()), pkg_version: env!("CARGO_PKG_VERSION").to_string(), }) } diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 251a2db99..43f3f4947 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -210,8 +210,7 @@ fn run_extraction_task( let current_span = tracing::Span::current(); rayon::spawn(move || { - let child_span = - tracing::trace_span!(target: "", parent: ¤t_span, "extract_multiple_chunks"); + let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: ¤t_span, "extract_multiple_chunks"); let _entered = child_span.enter(); puffin::profile_scope!("extract_multiple_chunks", name); match extract_fn(chunk, indexer) { diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 61ca1a024..7499b68e5 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -284,7 +284,7 @@ where #[tracing::instrument( level = "trace", skip_all, - target = "profile::indexing::details", + target = "indexing::details", name = "index_documents_raw" )] pub fn execute_raw(self, output: TransformOutput) -> Result diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 1fea9a70f..6aad290e5 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -473,7 +473,7 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsEmptyDocids(_) => { - let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids"); + let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids"); let _entered = span.enter(); let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn); diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs index f3e6238ff..584fe53f8 100644 --- a/tracing-trace/src/processor/span_stats.rs +++ b/tracing-trace/src/processor/span_stats.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, HashMap}; +use std::ops::Range; use std::time::Duration; use serde::{Deserialize, Serialize}; @@ -16,6 +17,51 @@ enum SpanStatus { pub struct CallStats { pub call_count: usize, pub time: u64, + pub self_time: u64, +} + +#[derive(Debug, Default)] +pub struct SelfTime { + child_ranges: Vec>, +} + +impl SelfTime { + pub fn new() -> Self { + Default::default() + } + + pub fn add_child_range(&mut self, child_range: Range) { + self.child_ranges.push(child_range) + } + + pub fn self_duration(&mut self, self_range: Range) -> Duration { + if self.child_ranges.is_empty() { + return self_range.end - self_range.start; + } + + // by sorting child ranges by their start time, + // we make sure that no child will start before the last one we visited. + self.child_ranges + .sort_by(|left, right| left.start.cmp(&right.start).then(left.end.cmp(&right.end))); + // self duration computed by adding all the segments where the span is not executing a child + let mut self_duration = Duration::from_nanos(0); + + // last point in time where we are certain that this span was not executing a child. + let mut committed_point = self_range.start; + + for child_range in &self.child_ranges { + if child_range.start > committed_point { + // we add to the self duration the point between the end of the latest span and the beginning of the next span + self_duration += child_range.start - committed_point; + } + if committed_point < child_range.end { + // then we set ourselves to the end of the latest span + committed_point = child_range.end; + } + } + + self_duration + } } pub fn to_call_stats( @@ -23,6 +69,9 @@ pub fn to_call_stats( ) -> Result, Error> { let mut calls = HashMap::new(); let mut spans = HashMap::new(); + let mut last_point = Duration::from_nanos(0); + let mut first_point = None; + let mut total_self_time = SelfTime::new(); for entry in trace { let entry = entry?; match entry { @@ -31,10 +80,11 @@ pub fn to_call_stats( } Entry::NewThread(_) => {} Entry::NewSpan(span) => { - spans.insert(span.id, (span, SpanStatus::Outside)); + spans.insert(span.id, (span, SpanStatus::Outside, SelfTime::new())); } Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => { - let (_, status) = spans.get_mut(&id).unwrap(); + first_point.get_or_insert(time); + let (_, status, _) = spans.get_mut(&id).unwrap(); let SpanStatus::Outside = status else { continue; @@ -43,18 +93,32 @@ pub fn to_call_stats( *status = SpanStatus::Inside(time); } Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => { - let (span, status) = spans.get_mut(&id).unwrap(); + let (span, status, self_time) = spans.get_mut(&id).unwrap(); let SpanStatus::Inside(begin) = status else { continue; }; let begin = *begin; + if last_point < end { + last_point = end; + } + *status = SpanStatus::Outside; + let self_range = begin..end; + + let self_duration = self_time.self_duration(self_range.clone()); + *self_time = SelfTime::new(); + let span = *span; + if let Some(parent_id) = span.parent_id { + let (_, _, parent_self_time) = spans.get_mut(&parent_id).unwrap(); + parent_self_time.add_child_range(self_range.clone()) + } + total_self_time.add_child_range(self_range); let (_, call_list) = calls.get_mut(&span.call_id).unwrap(); - call_list.push(end - begin); + call_list.push((end - begin, self_duration)); } Entry::SpanClose(SpanClose { id, time: _ }) => { spans.remove(&id); @@ -63,17 +127,31 @@ pub fn to_call_stats( } } + let total_self_time = first_point + .map(|first_point| (first_point, total_self_time.self_duration(first_point..last_point))); + Ok(calls .into_iter() .map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls))) + .chain(total_self_time.map(|(first_point, total_self_time)| { + ( + "::meta::total".to_string(), + CallStats { + call_count: 1, + time: (last_point - first_point).as_nanos() as u64, + self_time: total_self_time.as_nanos() as u64, + }, + ) + })) .collect()) } fn site_to_string(call_site: NewCallsite) -> String { format!("{}::{}", call_site.target, call_site.name) } -fn calls_to_stats(calls: Vec) -> CallStats { +fn calls_to_stats(calls: Vec<(Duration, Duration)>) -> CallStats { let nb = calls.len(); - let sum: Duration = calls.iter().sum(); - CallStats { call_count: nb, time: sum.as_nanos() as u64 } + let sum: Duration = calls.iter().map(|(total, _)| total).sum(); + let self_sum: Duration = calls.iter().map(|(_, self_duration)| self_duration).sum(); + CallStats { call_count: nb, time: sum.as_nanos() as u64, self_time: self_sum.as_nanos() as u64 } } diff --git a/workloads/hackernews.json b/workloads/hackernews.json new file mode 100644 index 000000000..0a99b69ff --- /dev/null +++ b/workloads/hackernews.json @@ -0,0 +1,164 @@ +{ + "name": "hackernews.ndjson_1M", + "run_count": 3, + "extra_cli_args": [], + "assets": { + "hackernews-100_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson", + "sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213" + }, + "hackernews-200_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson", + "sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685" + }, + "hackernews-300_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson", + "sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2" + }, + "hackernews-400_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson", + "sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7" + }, + "hackernews-500_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson", + "sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083" + }, + "hackernews-600_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson", + "sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe" + }, + "hackernews-700_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson", + "sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b" + }, + "hackernews-800_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson", + "sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546" + }, + "hackernews-900_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson", + "sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9" + }, + "hackernews-1_000_000.ndjson": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson", + "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe" + } + }, + "commands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "displayedAttributes": [ + "title", + "by", + "score", + "time" + ], + "searchableAttributes": [ + "title" + ], + "filterableAttributes": [ + "by" + ], + "sortableAttributes": [ + "score", + "time" + ] + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-100_000.ndjson" + }, + "synchronous": "WaitForTask" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-200_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-300_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-400_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-500_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-600_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-700_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-800_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-900_000.ndjson" + }, + "synchronous": "WaitForResponse" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "hackernews-1_000_000.ndjson" + }, + "synchronous": "WaitForTask" + } + ] +} \ No newline at end of file diff --git a/workloads/movies-nothreads.json b/workloads/movies-nothreads.json new file mode 100644 index 000000000..175daacf9 --- /dev/null +++ b/workloads/movies-nothreads.json @@ -0,0 +1,44 @@ +{ + "name": "movies.json,no-threads", + "run_count": 2, + "extra_cli_args": [ + "--max-indexing-threads=1" + ], + "assets": { + "movies.json": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json", + "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1" + } + }, + "commands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres", + "release_date" + ], + "sortableAttributes": [ + "release_date" + ] + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "movies.json" + }, + "synchronous": "WaitForTask" + } + ] +} \ No newline at end of file diff --git a/workloads/movies.json b/workloads/movies.json new file mode 100644 index 000000000..445ff3aca --- /dev/null +++ b/workloads/movies.json @@ -0,0 +1,42 @@ +{ + "name": "movies.json", + "run_count": 10, + "extra_cli_args": [], + "assets": { + "movies.json": { + "local_location": null, + "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json", + "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1" + } + }, + "commands": [ + { + "route": "indexes/movies/settings", + "method": "PATCH", + "body": { + "inline": { + "searchableAttributes": [ + "title", + "overview" + ], + "filterableAttributes": [ + "genres", + "release_date" + ], + "sortableAttributes": [ + "release_date" + ] + } + }, + "synchronous": "DontWait" + }, + { + "route": "indexes/movies/documents", + "method": "POST", + "body": { + "asset": "movies.json" + }, + "synchronous": "WaitForTask" + } + ] +} diff --git a/xtask/Cargo.toml b/xtask/Cargo.toml index 07271ea09..562dfddb3 100644 --- a/xtask/Cargo.toml +++ b/xtask/Cargo.toml @@ -11,5 +11,34 @@ license.workspace = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +anyhow = "1.0.79" +build-info = { version = "1.7.0", path = "../build-info" } cargo_metadata = "0.18.1" clap = { version = "4.4.14", features = ["derive"] } +futures-core = "0.3.30" +futures-util = "0.3.30" +reqwest = { version = "0.11.23", features = [ + "stream", + "json", + "rustls-tls", +], default_features = false } +serde = { version = "1.0.195", features = ["derive"] } +serde_json = "1.0.111" +sha2 = "0.10.8" +sysinfo = "0.30.5" +time = { version = "0.3.32", features = [ + "serde", + "serde-human-readable", + "macros", +] } +tokio = { version = "1.35.1", features = [ + "rt", + "net", + "time", + "process", + "signal", +] } +tracing = "0.1.40" +tracing-subscriber = "0.3.18" +tracing-trace = { version = "0.1.0", path = "../tracing-trace" } +uuid = { version = "1.7.0", features = ["v7", "serde"] } diff --git a/xtask/src/bench/assets.rs b/xtask/src/bench/assets.rs new file mode 100644 index 000000000..241928dbf --- /dev/null +++ b/xtask/src/bench/assets.rs @@ -0,0 +1,250 @@ +use std::collections::BTreeMap; +use std::io::{Read as _, Seek as _, Write as _}; + +use anyhow::{bail, Context}; +use futures_util::TryStreamExt as _; +use serde::Deserialize; +use sha2::Digest; + +use super::client::Client; + +#[derive(Deserialize, Clone)] +pub struct Asset { + pub local_location: Option, + pub remote_location: Option, + #[serde(default)] + pub format: AssetFormat, + pub sha256: Option, +} + +#[derive(Deserialize, Default, Copy, Clone)] +pub enum AssetFormat { + #[default] + Auto, + Json, + NdJson, + Raw, +} + +impl AssetFormat { + pub fn to_content_type(self, filename: &str) -> &'static str { + match self { + AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename), + AssetFormat::Json => "application/json", + AssetFormat::NdJson => "application/x-ndjson", + AssetFormat::Raw => "application/octet-stream", + } + } + + fn auto_detect(filename: &str) -> Self { + let path = std::path::Path::new(filename); + match path.extension().and_then(|extension| extension.to_str()) { + Some(extension) if extension.eq_ignore_ascii_case("json") => Self::Json, + Some(extension) if extension.eq_ignore_ascii_case("ndjson") => Self::NdJson, + extension => { + tracing::warn!(asset = filename, ?extension, "asset has format `Auto`, but extension was not recognized. Specify `Raw` format to suppress this warning."); + AssetFormat::Raw + } + } + } +} + +pub fn fetch_asset( + name: &str, + assets: &BTreeMap, + asset_folder: &str, +) -> anyhow::Result<(std::fs::File, AssetFormat)> { + let asset = + assets.get(name).with_context(|| format!("could not find asset with name '{name}'"))?; + let filename = if let Some(local_filename) = &asset.local_location { + local_filename.clone() + } else { + format!("{asset_folder}/{name}") + }; + + Ok(( + std::fs::File::open(&filename) + .with_context(|| format!("could not open asset '{name}' at '{filename}'"))?, + asset.format, + )) +} + +#[tracing::instrument(skip(client, assets), fields(asset_count = assets.len()))] +pub async fn fetch_assets( + client: &Client, + assets: &BTreeMap, + asset_folder: &str, +) -> anyhow::Result<()> { + let mut download_tasks = tokio::task::JoinSet::new(); + for (name, asset) in assets { + // trying local + if let Some(local) = &asset.local_location { + match std::fs::File::open(local) { + Ok(file) => { + if check_sha256(name, asset, file)? { + continue; + } else { + tracing::warn!(asset = name, file = local, "found local resource for asset but hash differed, skipping to asset store"); + } + } + Err(error) => match error.kind() { + std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */ + } + _ => tracing::warn!( + error = &error as &dyn std::error::Error, + "error checking local resource, skipping to asset store" + ), + }, + } + } + + // checking asset store + let store_filename = format!("{}/{}", asset_folder, name); + + match std::fs::File::open(&store_filename) { + Ok(file) => { + if check_sha256(name, asset, file)? { + continue; + } else { + tracing::warn!(asset = name, file = store_filename, "found resource for asset in asset store, but hash differed, skipping to remote method"); + } + } + Err(error) => match error.kind() { + std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */ + } + _ => tracing::warn!( + error = &error as &dyn std::error::Error, + "error checking resource in store, skipping to remote method" + ), + }, + } + + // downloading remote + match &asset.remote_location { + Some(location) => { + std::fs::create_dir_all(asset_folder).with_context(|| format!("could not create asset folder at {asset_folder}"))?; + download_tasks.spawn({ + let client = client.clone(); + let name = name.to_string(); + let location = location.to_string(); + let store_filename = store_filename.clone(); + let asset = asset.clone(); + download_asset(client, name, asset, location, store_filename)}); + }, + None => bail!("asset {name} has no remote location, but was not found locally or in the asset store"), + } + } + + while let Some(res) = download_tasks.join_next().await { + res.context("download task panicked")?.context("download task failed")?; + } + + Ok(()) +} + +fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::Result { + let mut bytes = Vec::new(); + file.read_to_end(&mut bytes).with_context(|| format!("hashing file for asset {name}"))?; + let mut file_hash = sha2::Sha256::new(); + file_hash.update(&bytes); + let file_hash = file_hash.finalize(); + let file_hash = format!("{:x}", file_hash); + tracing::debug!(hash = file_hash, "hashed local file"); + + Ok(match &asset.sha256 { + Some(hash) => { + tracing::debug!(hash, "hash from workload"); + if hash.to_ascii_lowercase() == file_hash { + true + } else { + tracing::warn!( + file_hash, + asset_hash = hash.to_ascii_lowercase(), + "hashes don't match" + ); + false + } + } + None => { + tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file"); + true + } + }) +} + +#[tracing::instrument(skip(client, asset, name), fields(asset = name))] +async fn download_asset( + client: Client, + name: String, + asset: Asset, + src: String, + dest_filename: String, +) -> anyhow::Result<()> { + let context = || format!("failure downloading asset {name} from {src}"); + + let response = client.get(&src).send().await.with_context(context)?; + + let file = std::fs::File::options() + .create(true) + .truncate(true) + .write(true) + .read(true) + .open(&dest_filename) + .with_context(|| format!("creating destination file {dest_filename}")) + .with_context(context)?; + + let mut dest = std::io::BufWriter::new( + file.try_clone().context("cloning I/O handle").with_context(context)?, + ); + + let total_len: Option = response + .headers() + .get(reqwest::header::CONTENT_LENGTH) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.parse().ok()); + + let progress = tokio::spawn({ + let name = name.clone(); + async move { + loop { + match file.metadata().context("could not get file metadata") { + Ok(metadata) => { + let len = metadata.len(); + tracing::info!( + asset = name, + downloaded_bytes = len, + total_bytes = total_len, + "asset download in progress" + ); + } + Err(error) => { + tracing::warn!(%error, "could not get file metadata"); + } + } + tokio::time::sleep(std::time::Duration::from_secs(60)).await; + } + } + }); + + let writing_context = || format!("while writing to destination file at {dest_filename}"); + + let mut response = response.bytes_stream(); + + while let Some(bytes) = + response.try_next().await.context("while downloading file").with_context(context)? + { + dest.write_all(&bytes).with_context(writing_context).with_context(context)?; + } + + progress.abort(); + + let mut file = dest.into_inner().with_context(writing_context).with_context(context)?; + + file.rewind().context("while rewinding asset file")?; + + if !check_sha256(&name, &asset, file)? { + bail!("asset '{name}': sha256 mismatch for file {dest_filename} downloaded from {src}") + } + + Ok(()) +} diff --git a/xtask/src/bench/client.rs b/xtask/src/bench/client.rs new file mode 100644 index 000000000..3e46615cc --- /dev/null +++ b/xtask/src/bench/client.rs @@ -0,0 +1,80 @@ +use anyhow::Context; +use serde::Deserialize; + +#[derive(Debug, Clone)] +pub struct Client { + base_url: Option, + client: reqwest::Client, +} + +impl Client { + pub fn new( + base_url: Option, + api_key: Option<&str>, + timeout: Option, + ) -> anyhow::Result { + let mut headers = reqwest::header::HeaderMap::new(); + if let Some(api_key) = api_key { + headers.append( + reqwest::header::AUTHORIZATION, + reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}")) + .context("Invalid authorization header")?, + ); + } + + let client = reqwest::ClientBuilder::new().default_headers(headers); + let client = if let Some(timeout) = timeout { client.timeout(timeout) } else { client }; + let client = client.build()?; + Ok(Self { base_url, client }) + } + + pub fn request(&self, method: reqwest::Method, route: &str) -> reqwest::RequestBuilder { + if let Some(base_url) = &self.base_url { + if route.is_empty() { + self.client.request(method, base_url) + } else { + self.client.request(method, format!("{}/{}", base_url, route)) + } + } else { + self.client.request(method, route) + } + } + + pub fn get(&self, route: &str) -> reqwest::RequestBuilder { + self.request(reqwest::Method::GET, route) + } + + pub fn put(&self, route: &str) -> reqwest::RequestBuilder { + self.request(reqwest::Method::PUT, route) + } + + pub fn post(&self, route: &str) -> reqwest::RequestBuilder { + self.request(reqwest::Method::POST, route) + } + + pub fn delete(&self, route: &str) -> reqwest::RequestBuilder { + self.request(reqwest::Method::DELETE, route) + } +} + +#[derive(Debug, Clone, Copy, Deserialize)] +#[serde(rename_all = "SCREAMING_SNAKE_CASE")] +pub enum Method { + Get, + Post, + Patch, + Delete, + Put, +} + +impl From for reqwest::Method { + fn from(value: Method) -> Self { + match value { + Method::Get => Self::GET, + Method::Post => Self::POST, + Method::Patch => Self::PATCH, + Method::Delete => Self::DELETE, + Method::Put => Self::PUT, + } + } +} diff --git a/xtask/src/bench/command.rs b/xtask/src/bench/command.rs new file mode 100644 index 000000000..0f0b5d213 --- /dev/null +++ b/xtask/src/bench/command.rs @@ -0,0 +1,194 @@ +use std::collections::BTreeMap; +use std::fmt::Display; +use std::io::Read as _; + +use anyhow::{bail, Context as _}; +use serde::Deserialize; + +use super::assets::{fetch_asset, Asset}; +use super::client::{Client, Method}; + +#[derive(Clone, Deserialize)] +pub struct Command { + pub route: String, + pub method: Method, + #[serde(default)] + pub body: Body, + #[serde(default)] + pub synchronous: SyncMode, +} + +#[derive(Default, Clone, Deserialize)] +#[serde(untagged)] +pub enum Body { + Inline { + inline: serde_json::Value, + }, + Asset { + asset: String, + }, + #[default] + Empty, +} + +impl Body { + pub fn get( + self, + assets: &BTreeMap, + asset_folder: &str, + ) -> anyhow::Result, &'static str)>> { + Ok(match self { + Body::Inline { inline: body } => Some(( + serde_json::to_vec(&body) + .context("serializing to bytes") + .context("while getting inline body")?, + "application/json", + )), + Body::Asset { asset: name } => Some({ + let context = || format!("while getting body from asset '{name}'"); + let (mut file, format) = + fetch_asset(&name, assets, asset_folder).with_context(context)?; + let mut buf = Vec::new(); + file.read_to_end(&mut buf).with_context(context)?; + (buf, format.to_content_type(&name)) + }), + Body::Empty => None, + }) + } +} + +impl Display for Command { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous) + } +} + +#[derive(Default, Debug, Clone, Copy, Deserialize)] +pub enum SyncMode { + DontWait, + #[default] + WaitForResponse, + WaitForTask, +} + +pub async fn run_batch( + client: &Client, + batch: &[Command], + assets: &BTreeMap, + asset_folder: &str, +) -> anyhow::Result<()> { + let [.., last] = batch else { return Ok(()) }; + let sync = last.synchronous; + + let mut tasks = tokio::task::JoinSet::new(); + + for command in batch { + // FIXME: you probably don't want to copy assets everytime here + tasks.spawn({ + let client = client.clone(); + let command = command.clone(); + let assets = assets.clone(); + let asset_folder = asset_folder.to_owned(); + + async move { run(client, command, &assets, &asset_folder).await } + }); + } + + while let Some(result) = tasks.join_next().await { + result + .context("panicked while executing command")? + .context("error while executing command")?; + } + + match sync { + SyncMode::DontWait => {} + SyncMode::WaitForResponse => {} + SyncMode::WaitForTask => wait_for_tasks(client).await?, + } + + Ok(()) +} + +async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> { + loop { + let response = client + .get("tasks?statuses=enqueued,processing") + .send() + .await + .context("could not wait for tasks")?; + let response: serde_json::Value = response + .json() + .await + .context("could not deserialize response to JSON") + .context("could not wait for tasks")?; + match response.get("total") { + Some(serde_json::Value::Number(number)) => { + let number = number.as_u64().with_context(|| { + format!("waiting for tasks: could not parse 'total' as integer, got {}", number) + })?; + if number == 0 { + break; + } else { + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + continue; + } + } + Some(thing_else) => { + bail!(format!( + "waiting for tasks: could not parse 'total' as a number, got '{thing_else}'" + )) + } + None => { + bail!(format!( + "waiting for tasks: expected response to contain 'total', got '{response}'" + )) + } + } + } + Ok(()) +} + +#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))] +pub async fn run( + client: Client, + mut command: Command, + assets: &BTreeMap, + asset_folder: &str, +) -> anyhow::Result<()> { + // memtake the body here to leave an empty body in its place, so that command is not partially moved-out + let body = std::mem::take(&mut command.body) + .get(assets, asset_folder) + .with_context(|| format!("while getting body for command {command}"))?; + + let request = client.request(command.method.into(), &command.route); + + let request = if let Some((body, content_type)) = body { + request.body(body).header(reqwest::header::CONTENT_TYPE, content_type) + } else { + request + }; + + let response = + request.send().await.with_context(|| format!("error sending command: {}", command))?; + + let code = response.status(); + if code.is_client_error() { + tracing::error!(%command, %code, "error in workload file"); + let response: serde_json::Value = response + .json() + .await + .context("could not deserialize response as JSON") + .context("parsing error in workload file when sending command")?; + bail!("error in workload file: server responded with error code {code} and '{response}'") + } else if code.is_server_error() { + tracing::error!(%command, %code, "server error"); + let response: serde_json::Value = response + .json() + .await + .context("could not deserialize response as JSON") + .context("parsing server error when sending command")?; + bail!("server error: server responded with error code {code} and '{response}'") + } + + Ok(()) +} diff --git a/xtask/src/bench/dashboard.rs b/xtask/src/bench/dashboard.rs new file mode 100644 index 000000000..833426207 --- /dev/null +++ b/xtask/src/bench/dashboard.rs @@ -0,0 +1,167 @@ +use std::collections::BTreeMap; + +use anyhow::{bail, Context}; +use serde_json::json; +use tokio::signal::ctrl_c; +use tokio::task::AbortHandle; +use tracing_trace::processor::span_stats::CallStats; +use uuid::Uuid; + +use super::client::Client; +use super::env_info; +use super::workload::Workload; + +pub async fn cancel_on_ctrl_c( + invocation_uuid: Uuid, + dashboard_client: Client, + abort_handle: AbortHandle, +) { + tracing::info!("press Ctrl-C to cancel the invocation"); + match ctrl_c().await { + Ok(()) => { + tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation"); + mark_as_failed(dashboard_client, invocation_uuid, None).await; + abort_handle.abort(); + } + Err(error) => tracing::warn!( + error = &error as &dyn std::error::Error, + "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C" + ), + } +} + +pub async fn mark_as_failed( + dashboard_client: Client, + invocation_uuid: Uuid, + failure_reason: Option, +) { + let response = dashboard_client + .post("cancel-invocation") + .json(&json!({ + "invocation_uuid": invocation_uuid, + "failure_reason": failure_reason, + })) + .send() + .await; + let response = match response { + Ok(response) => response, + Err(response_error) => { + tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed"); + return; + } + }; + + if !response.status().is_success() { + tracing::error!( + %invocation_uuid, + "could not mark invocation as failed: {}", + response.text().await.unwrap() + ); + return; + } + tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled"); +} + +pub async fn send_machine_info( + dashboard_client: &Client, + env: &env_info::Environment, +) -> anyhow::Result<()> { + let response = dashboard_client + .put("machine") + .json(&json!({"hostname": env.hostname})) + .send() + .await + .context("sending machine information")?; + if !response.status().is_success() { + bail!( + "could not send machine information: {} {}", + response.status(), + response.text().await.unwrap_or_else(|_| "unknown".into()) + ); + } + Ok(()) +} + +pub async fn create_invocation( + dashboard_client: &Client, + build_info: build_info::BuildInfo, + commit_message: &str, + env: env_info::Environment, + max_workloads: usize, + reason: Option<&str>, +) -> anyhow::Result { + let response = dashboard_client + .put("invocation") + .json(&json!({ + "commit": { + "sha1": build_info.commit_sha1, + "message": commit_message, + "commit_date": build_info.commit_timestamp, + "branch": build_info.branch, + "tag": build_info.describe.and_then(|describe| describe.as_tag()), + }, + "machine_hostname": env.hostname, + "max_workloads": max_workloads, + "reason": reason + })) + .send() + .await + .context("sending invocation")?; + if !response.status().is_success() { + bail!( + "could not send new invocation: {}", + response.text().await.unwrap_or_else(|_| "unknown".into()) + ); + } + let invocation_uuid: Uuid = + response.json().await.context("could not deserialize invocation response as JSON")?; + Ok(invocation_uuid) +} + +pub async fn create_workload( + dashboard_client: &Client, + invocation_uuid: Uuid, + workload: &Workload, +) -> anyhow::Result { + let response = dashboard_client + .put("workload") + .json(&json!({ + "invocation_uuid": invocation_uuid, + "name": &workload.name, + "max_runs": workload.run_count, + })) + .send() + .await + .context("could not create new workload")?; + + if !response.status().is_success() { + bail!("creating new workload failed: {}", response.text().await.unwrap()) + } + + let workload_uuid: Uuid = + response.json().await.context("could not deserialize JSON as UUID")?; + Ok(workload_uuid) +} + +pub async fn create_run( + dashboard_client: Client, + workload_uuid: Uuid, + report: &BTreeMap, +) -> anyhow::Result<()> { + let response = dashboard_client + .put("run") + .json(&json!({ + "workload_uuid": workload_uuid, + "data": report + })) + .send() + .await + .context("sending new run")?; + if !response.status().is_success() { + bail!( + "sending new run failed: {}", + response.text().await.unwrap_or_else(|_| "unknown".into()) + ) + } + Ok(()) +} diff --git a/xtask/src/bench/env_info.rs b/xtask/src/bench/env_info.rs new file mode 100644 index 000000000..08dacf915 --- /dev/null +++ b/xtask/src/bench/env_info.rs @@ -0,0 +1,75 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct Environment { + pub hostname: Option, + pub cpu: String, + + /// Advertised or nominal clock speed in Hertz. + pub clock_speed: u64, + + /// Total number of bytes of memory provided by the system. */ + pub memory: u64, + pub os_type: String, + pub software: Vec, + + pub user_name: String, + + /// Is set true when the data was gathered by a manual run, + /// possibly on a developer machine, instead of the usual benchmark server. + pub manual_run: bool, +} + +impl Environment { + pub fn generate_from_current_config() -> Self { + use sysinfo::System; + + let unknown_string = String::from("Unknown"); + let mut system = System::new(); + system.refresh_cpu(); + system.refresh_cpu_frequency(); + system.refresh_memory(); + + let (cpu, frequency) = match system.cpus().first() { + Some(cpu) => ( + format!("{} @ {:.2}GHz", cpu.brand(), cpu.frequency() as f64 / 1000.0), + cpu.frequency() * 1_000_000, + ), + None => (unknown_string.clone(), 0), + }; + + let mut software = Vec::new(); + if let Some(distribution) = System::name() { + software + .push(VersionInfo { name: distribution, version: String::from("distribution") }); + } + if let Some(kernel) = System::kernel_version() { + software.push(VersionInfo { name: kernel, version: String::from("kernel") }); + } + if let Some(os) = System::os_version() { + software.push(VersionInfo { name: os, version: String::from("kernel-release") }); + } + if let Some(arch) = System::cpu_arch() { + software.push(VersionInfo { name: arch, version: String::from("arch") }); + } + + Self { + hostname: System::host_name(), + cpu, + clock_speed: frequency, + memory: system.total_memory(), + os_type: System::long_os_version().unwrap_or(unknown_string.clone()), + user_name: System::name().unwrap_or(unknown_string.clone()), + manual_run: false, + software, + } + } +} + +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct VersionInfo { + pub name: String, + pub version: String, +} diff --git a/xtask/src/bench/meili_process.rs b/xtask/src/bench/meili_process.rs new file mode 100644 index 000000000..99f6f4ea6 --- /dev/null +++ b/xtask/src/bench/meili_process.rs @@ -0,0 +1,112 @@ +use std::collections::BTreeMap; + +use anyhow::{bail, Context as _}; + +use super::assets::Asset; +use super::client::Client; +use super::workload::Workload; + +pub async fn kill(mut meilisearch: tokio::process::Child) { + if let Err(error) = meilisearch.kill().await { + tracing::warn!( + error = &error as &dyn std::error::Error, + "while terminating Meilisearch server" + ) + } +} + +#[tracing::instrument] +pub async fn build() -> anyhow::Result<()> { + let mut command = tokio::process::Command::new("cargo"); + command.arg("build").arg("--release").arg("-p").arg("meilisearch"); + + command.kill_on_drop(true); + + let mut builder = command.spawn().context("error building Meilisearch")?; + + if !builder.wait().await.context("could not build Meilisearch")?.success() { + bail!("failed building Meilisearch") + } + + Ok(()) +} + +#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))] +pub async fn start( + client: &Client, + master_key: Option<&str>, + workload: &Workload, + asset_folder: &str, +) -> anyhow::Result { + let mut command = tokio::process::Command::new("cargo"); + command + .arg("run") + .arg("--release") + .arg("-p") + .arg("meilisearch") + .arg("--bin") + .arg("meilisearch") + .arg("--"); + + command.arg("--db-path").arg("./_xtask_benchmark.ms"); + if let Some(master_key) = master_key { + command.arg("--master-key").arg(master_key); + } + command.arg("--experimental-enable-logs-route"); + + for extra_arg in workload.extra_cli_args.iter() { + command.arg(extra_arg); + } + + command.kill_on_drop(true); + + let mut meilisearch = command.spawn().context("Error starting Meilisearch")?; + + wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?; + + Ok(meilisearch) +} + +async fn wait_for_health( + client: &Client, + meilisearch: &mut tokio::process::Child, + assets: &BTreeMap, + asset_folder: &str, +) -> anyhow::Result<()> { + for i in 0..100 { + let res = super::command::run(client.clone(), health_command(), assets, asset_folder).await; + if res.is_ok() { + // check that this is actually the current Meilisearch instance that answered us + if let Some(exit_code) = + meilisearch.try_wait().context("cannot check Meilisearch server process status")? + { + tracing::error!("Got an health response from a different process"); + bail!("Meilisearch server exited early with code {exit_code}"); + } + + return Ok(()); + } + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + // check whether the Meilisearch instance exited early (cut the wait) + if let Some(exit_code) = + meilisearch.try_wait().context("cannot check Meilisearch server process status")? + { + bail!("Meilisearch server exited early with code {exit_code}"); + } + tracing::debug!(attempt = i, "Waiting for Meilisearch to go up"); + } + bail!("meilisearch is not responding") +} + +fn health_command() -> super::command::Command { + super::command::Command { + route: "/health".into(), + method: super::client::Method::Get, + body: Default::default(), + synchronous: super::command::SyncMode::WaitForResponse, + } +} + +pub fn delete_db() { + let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms"); +} diff --git a/xtask/src/bench/mod.rs b/xtask/src/bench/mod.rs new file mode 100644 index 000000000..62c11b604 --- /dev/null +++ b/xtask/src/bench/mod.rs @@ -0,0 +1,203 @@ +mod assets; +mod client; +mod command; +mod dashboard; +mod env_info; +mod meili_process; +mod workload; + +use std::path::PathBuf; + +use anyhow::Context; +use clap::Parser; +use tracing_subscriber::fmt::format::FmtSpan; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::Layer; + +use self::client::Client; +use self::workload::Workload; + +pub fn default_http_addr() -> String { + "127.0.0.1:7700".to_string() +} +pub fn default_report_folder() -> String { + "./bench/reports/".into() +} + +pub fn default_asset_folder() -> String { + "./bench/assets/".into() +} + +pub fn default_log_filter() -> String { + "info".into() +} + +pub fn default_dashboard_url() -> String { + "http://localhost:9001".into() +} + +/// Run benchmarks from a workload +#[derive(Parser, Debug)] +pub struct BenchDeriveArgs { + /// Filename of the workload file, pass multiple filenames + /// to run multiple workloads in the specified order. + /// + /// Each workload run will get its own report file. + #[arg(value_name = "WORKLOAD_FILE", last = false)] + workload_file: Vec, + + /// URL of the dashboard. + #[arg(long, default_value_t = default_dashboard_url())] + dashboard_url: String, + + /// Directory to output reports. + #[arg(long, default_value_t = default_report_folder())] + report_folder: String, + + /// Directory to store the remote assets. + #[arg(long, default_value_t = default_asset_folder())] + asset_folder: String, + + /// Log directives + #[arg(short, long, default_value_t = default_log_filter())] + log_filter: String, + + /// Benchmark dashboard API key + #[arg(long)] + api_key: Option, + + /// Meilisearch master keys + #[arg(long)] + master_key: Option, + + /// Authentication bearer for fetching assets + #[arg(long)] + assets_key: Option, + + /// Reason for the benchmark invocation + #[arg(short, long)] + reason: Option, +} + +pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> { + // setup logs + let filter: tracing_subscriber::filter::Targets = + args.log_filter.parse().context("invalid --log-filter")?; + + let subscriber = tracing_subscriber::registry().with( + tracing_subscriber::fmt::layer() + .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE) + .with_filter(filter), + ); + tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?; + + // fetch environment and build info + let env = env_info::Environment::generate_from_current_config(); + let build_info = build_info::BuildInfo::from_build(); + + // tokio runtime + let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?; + let _scope = rt.enter(); + + // setup clients + let assets_client = + Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h + + let dashboard_client = Client::new( + Some(format!("{}/api/v1", args.dashboard_url)), + args.api_key.as_deref(), + Some(std::time::Duration::from_secs(60)), + )?; + + // reporting uses its own client because keeping the stream open to wait for entries + // blocks any other requests + // Also we don't want any pesky timeout because we don't know how much time it will take to recover the full trace + let logs_client = Client::new( + Some("http://127.0.0.1:7700/logs/stream".into()), + args.master_key.as_deref(), + None, + )?; + + let meili_client = Client::new( + Some("http://127.0.0.1:7700".into()), + args.master_key.as_deref(), + Some(std::time::Duration::from_secs(60)), + )?; + + // enter runtime + + rt.block_on(async { + dashboard::send_machine_info(&dashboard_client, &env).await?; + + let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap(); + let max_workloads = args.workload_file.len(); + let reason: Option<&str> = args.reason.as_deref(); + let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?; + + tracing::info!(workload_count = args.workload_file.len(), "handling workload files"); + + // main task + let workload_runs = tokio::spawn( + { + let dashboard_client = dashboard_client.clone(); + async move { + for workload_file in args.workload_file.iter() { + let workload: Workload = serde_json::from_reader( + std::fs::File::open(workload_file) + .with_context(|| format!("error opening {}", workload_file.display()))?, + ) + .with_context(|| format!("error parsing {} as JSON", workload_file.display()))?; + + workload::execute( + &assets_client, + &dashboard_client, + &logs_client, + &meili_client, + invocation_uuid, + args.master_key.as_deref(), + workload, + &args, + ) + .await?; + } + Ok::<(), anyhow::Error>(()) + }}); + + // handle ctrl-c + let abort_handle = workload_runs.abort_handle(); + tokio::spawn({ + let dashboard_client = dashboard_client.clone(); + dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle) + }); + + // wait for the end of the main task, handle result + match workload_runs.await { + Ok(Ok(_)) => { + tracing::info!("Success"); + Ok::<(), anyhow::Error>(()) + } + Ok(Err(error)) => { + tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard"); + dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await; + tracing::warn!(%invocation_uuid, "invocation marked as failed following error"); + Err(error) + }, + Err(join_error) => { + match join_error.try_into_panic() { + Ok(panic) => { + tracing::error!("invocation panicked, attempting to report the failure to dashboard"); + dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await; + std::panic::resume_unwind(panic) + } + Err(_) => { + tracing::warn!("task was canceled"); + Ok(()) + } + } + }, + } + + })?; + + Ok(()) +} diff --git a/xtask/src/bench/workload.rs b/xtask/src/bench/workload.rs new file mode 100644 index 000000000..b3e952f29 --- /dev/null +++ b/xtask/src/bench/workload.rs @@ -0,0 +1,262 @@ +use std::collections::BTreeMap; +use std::fs::File; +use std::io::{Seek as _, Write as _}; + +use anyhow::{bail, Context as _}; +use futures_util::TryStreamExt as _; +use serde::Deserialize; +use serde_json::json; +use tokio::task::JoinHandle; +use uuid::Uuid; + +use super::assets::Asset; +use super::client::Client; +use super::command::SyncMode; +use super::BenchDeriveArgs; +use crate::bench::{assets, dashboard, meili_process}; + +#[derive(Deserialize)] +pub struct Workload { + pub name: String, + pub run_count: u16, + pub extra_cli_args: Vec, + pub assets: BTreeMap, + pub commands: Vec, +} + +async fn run_commands( + dashboard_client: &Client, + logs_client: &Client, + meili_client: &Client, + workload_uuid: Uuid, + workload: &Workload, + args: &BenchDeriveArgs, + run_number: u16, +) -> anyhow::Result>> { + let report_folder = &args.report_folder; + let workload_name = &workload.name; + + std::fs::create_dir_all(report_folder) + .with_context(|| format!("could not create report directory at {report_folder}"))?; + + let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json"); + let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json"); + + let report_handle = start_report(logs_client, trace_filename).await?; + + for batch in workload + .commands + .as_slice() + .split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait)) + { + super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder) + .await?; + } + + let processor = + stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle) + .await?; + + Ok(processor) +} + +#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner +#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))] +pub async fn execute( + assets_client: &Client, + dashboard_client: &Client, + logs_client: &Client, + meili_client: &Client, + invocation_uuid: Uuid, + master_key: Option<&str>, + workload: Workload, + args: &BenchDeriveArgs, +) -> anyhow::Result<()> { + assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?; + + let workload_uuid = + dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?; + + let mut tasks = Vec::new(); + + for i in 0..workload.run_count { + tasks.push( + execute_run( + dashboard_client, + logs_client, + meili_client, + workload_uuid, + master_key, + &workload, + args, + i, + ) + .await?, + ); + } + + let mut reports = Vec::with_capacity(workload.run_count as usize); + + for task in tasks { + reports.push( + task.await + .context("task panicked while processing report")? + .context("task failed while processing report")?, + ); + } + + tracing::info!(workload = workload.name, "Successful workload"); + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner +#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))] +async fn execute_run( + dashboard_client: &Client, + logs_client: &Client, + meili_client: &Client, + workload_uuid: Uuid, + master_key: Option<&str>, + workload: &Workload, + args: &BenchDeriveArgs, + run_number: u16, +) -> anyhow::Result>> { + meili_process::delete_db(); + + meili_process::build().await?; + let meilisearch = + meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?; + + let processor = run_commands( + dashboard_client, + logs_client, + meili_client, + workload_uuid, + workload, + args, + run_number, + ) + .await?; + + meili_process::kill(meilisearch).await; + + tracing::info!(run_number, "Successful run"); + + Ok(processor) +} + +async fn start_report( + logs_client: &Client, + filename: String, +) -> anyhow::Result>> { + let report_file = std::fs::File::options() + .create(true) + .truncate(true) + .write(true) + .read(true) + .open(&filename) + .with_context(|| format!("could not create file at {filename}"))?; + let mut report_file = std::io::BufWriter::new(report_file); + + let response = logs_client + .post("") + .json(&json!({ + "mode": "profile", + "target": "indexing::=trace" + })) + .send() + .await + .context("failed to start report")?; + + let code = response.status(); + if code.is_client_error() { + tracing::error!(%code, "request error when trying to start report"); + let response: serde_json::Value = response + .json() + .await + .context("could not deserialize response as JSON") + .context("response error when trying to start report")?; + bail!( + "request error when trying to start report: server responded with error code {code} and '{response}'" + ) + } else if code.is_server_error() { + tracing::error!(%code, "server error when trying to start report"); + let response: serde_json::Value = response + .json() + .await + .context("could not deserialize response as JSON") + .context("response error trying to start report")?; + bail!("server error when trying to start report: server responded with error code {code} and '{response}'") + } + + Ok(tokio::task::spawn(async move { + let mut stream = response.bytes_stream(); + while let Some(bytes) = stream.try_next().await.context("while waiting for report")? { + report_file + .write_all(&bytes) + .with_context(|| format!("while writing report to {filename}"))?; + } + report_file.into_inner().with_context(|| format!("while writing report to {filename}")) + })) +} + +async fn stop_report( + dashboard_client: &Client, + logs_client: &Client, + workload_uuid: Uuid, + filename: String, + report_handle: tokio::task::JoinHandle>, +) -> anyhow::Result>> { + let response = logs_client.delete("").send().await.context("while stopping report")?; + if !response.status().is_success() { + bail!("received HTTP {} while stopping report", response.status()) + } + + let mut file = tokio::time::timeout(std::time::Duration::from_secs(1000), report_handle) + .await + .context("while waiting for the end of the report")? + .context("report writing task panicked")? + .context("while writing report")?; + + file.rewind().context("while rewinding report file")?; + + let process_handle = tokio::task::spawn({ + let dashboard_client = dashboard_client.clone(); + async move { + let span = tracing::info_span!("processing trace to report", filename); + let _guard = span.enter(); + let report = tracing_trace::processor::span_stats::to_call_stats( + tracing_trace::TraceReader::new(std::io::BufReader::new(file)), + ) + .context("could not convert trace to report")?; + let context = || format!("writing report to {filename}"); + + dashboard::create_run(dashboard_client, workload_uuid, &report).await?; + + let mut output_file = std::io::BufWriter::new( + std::fs::File::options() + .create(true) + .truncate(true) + .write(true) + .read(true) + .open(&filename) + .with_context(context)?, + ); + + for (key, value) in report { + serde_json::to_writer(&mut output_file, &json!({key: value})) + .context("serializing span stat")?; + writeln!(&mut output_file).with_context(context)?; + } + output_file.flush().with_context(context)?; + let mut output_file = output_file.into_inner().with_context(context)?; + + output_file.rewind().context("could not rewind output_file").with_context(context)?; + + Ok(output_file) + } + }); + + Ok(process_handle) +} diff --git a/xtask/src/lib.rs b/xtask/src/lib.rs new file mode 100644 index 000000000..cbda260db --- /dev/null +++ b/xtask/src/lib.rs @@ -0,0 +1 @@ +pub mod bench; diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 6570dc67b..b81424666 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -1,6 +1,7 @@ use std::collections::HashSet; use clap::Parser; +use xtask::bench::BenchDeriveArgs; /// List features available in the workspace #[derive(Parser, Debug)] @@ -17,13 +18,16 @@ struct ListFeaturesDeriveArgs { #[command(bin_name = "cargo xtask")] enum Command { ListFeatures(ListFeaturesDeriveArgs), + Bench(BenchDeriveArgs), } -fn main() { +fn main() -> anyhow::Result<()> { let args = Command::parse(); match args { Command::ListFeatures(args) => list_features(args), + Command::Bench(args) => xtask::bench::run(args)?, } + Ok(()) } fn list_features(args: ListFeaturesDeriveArgs) {