diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml index 2b8b7bf62..2ede3dc21 100644 --- a/.github/workflows/milestone-workflow.yml +++ b/.github/workflows/milestone-workflow.yml @@ -110,6 +110,25 @@ jobs: --milestone $MILESTONE_VERSION \ --assignee curquiza + create-update-version-issue: + needs: get-release-version + # Create the changelog issue if the release is not only a patch release + if: github.event.action == 'created' + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/update-version-issue.md > $ISSUE_TEMPLATE + - name: Create the issue + run: | + gh issue create \ + --title "Update version in Cargo.toml for $MILESTONE_VERSION" \ + --label 'maintenance' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION + # ---------------- # MILESTONE CLOSED # ---------------- diff --git a/Cargo.lock b/Cargo.lock index 3f9171edc..962f30853 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,16 +36,16 @@ dependencies = [ [[package]] name = "actix-http" -version = "3.5.1" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "129d4c88e98860e1758c5de288d1632b07970a16d59bdf7b8d66053d582bb71f" +checksum = "d223b13fd481fc0d1f83bb12659ae774d9e3601814c68a0bc539731698cca743" dependencies = [ "actix-codec", "actix-rt", "actix-service", "actix-tls", "actix-utils", - "ahash 0.8.3", + "ahash 0.8.8", "base64 0.21.7", "bitflags 2.4.1", "brotli", @@ -138,9 +138,9 @@ dependencies = [ [[package]] name = "actix-tls" -version = "3.1.1" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72616e7fbec0aa99c6f3164677fa48ff5a60036d0799c98cab894a44f3e0efc3" +checksum = "d4cce60a2f2b477bc72e5cde0af1812a6e82d8fd85b5570a5dcf2a5bf2c5be5f" dependencies = [ "actix-rt", "actix-service", @@ -148,13 +148,11 @@ dependencies = [ "futures-core", "impl-more", "pin-project-lite", - "rustls 0.21.6", - "rustls-webpki", "tokio", - "tokio-rustls 0.23.4", + "tokio-rustls", "tokio-util", "tracing", - "webpki-roots 0.22.6", + "webpki-roots", ] [[package]] @@ -169,9 +167,9 @@ dependencies = [ [[package]] name = "actix-web" -version = "4.4.1" +version = "4.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e43428f3bf11dee6d166b00ec2df4e3aa8cc1606aaa0b7433c146852e2f4e03b" +checksum = "43a6556ddebb638c2358714d853257ed226ece6023ef9364f23f0c70737ea984" dependencies = [ "actix-codec", "actix-http", @@ -183,7 +181,7 @@ dependencies = [ "actix-tls", "actix-utils", "actix-web-codegen", - "ahash 0.8.3", + "ahash 0.8.8", "bytes", "bytestring", "cfg-if", @@ -270,14 +268,15 @@ dependencies = [ [[package]] name = "ahash" -version = "0.8.3" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" dependencies = [ "cfg-if", "getrandom", "once_cell", "version_check", + "zerocopy", ] [[package]] @@ -494,7 +493,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "bytes", @@ -535,6 +534,26 @@ dependencies = [ "serde", ] +[[package]] +name = "bindgen" +version = "0.68.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078" +dependencies = [ + "bitflags 2.4.1", + "cexpr", + "clang-sys", + "lazy_static", + "lazycell", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn 2.0.48", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -814,9 +833,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.82" +version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ "jobserver", "libc", @@ -831,6 +850,15 @@ dependencies = [ "smallvec", ] +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -915,6 +943,17 @@ dependencies = [ "inout", ] +[[package]] +name = "clang-sys" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1" +dependencies = [ + "glob", + "libc", + "libloading", +] + [[package]] name = "clap" version = "4.4.17" @@ -961,6 +1000,18 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "color-spantrace" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2" +dependencies = [ + "once_cell", + "owo-colors", + "tracing-core", + "tracing-error", +] + [[package]] name = "colorchoice" version = "1.0.0" @@ -1286,6 +1337,15 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "deduplicating_array" version = "0.1.5" @@ -1476,13 +1536,12 @@ dependencies = [ [[package]] name = "dump" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "big_s", "flate2", "http 0.2.11", - "log", "maplit", "meili-snap", "meilisearch-auth", @@ -1496,6 +1555,7 @@ dependencies = [ "tempfile", "thiserror", "time", + "tracing", "uuid", ] @@ -1720,7 +1780,7 @@ dependencies = [ [[package]] name = "file-store" -version = "1.6.1" +version = "1.7.0" dependencies = [ "faux", "tempfile", @@ -1742,7 +1802,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.6.1" +version = "1.7.0" dependencies = [ "insta", "nom", @@ -1773,7 +1833,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.6.1" +version = "1.7.0" dependencies = [ "criterion", "serde_json", @@ -1891,7 +1951,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.6.1" +version = "1.7.0" dependencies = [ "arbitrary", "clap", @@ -1911,6 +1971,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "fxprof-processed-profile" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" +dependencies = [ + "bitflags 2.4.1", + "debugid", + "fxhash", + "serde", + "serde_json", +] + [[package]] name = "gemm" version = "0.17.0" @@ -2052,8 +2125,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi", + "wasm-bindgen", ] [[package]] @@ -2177,7 +2252,7 @@ dependencies = [ "atomic-polyfill", "hash32", "rustc_version", - "spin 0.9.8", + "spin", "stable_deref_trait", ] @@ -2346,9 +2421,9 @@ dependencies = [ "futures-util", "http 0.2.11", "hyper", - "rustls 0.21.6", + "rustls", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", ] [[package]] @@ -2856,7 +2931,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "big_s", @@ -2869,11 +2944,9 @@ dependencies = [ "file-store", "flate2", "insta", - "log", "meili-snap", "meilisearch-auth", "meilisearch-types", - "nelson", "page_size 0.5.0", "puffin", "roaring", @@ -2883,6 +2956,7 @@ dependencies = [ "tempfile", "thiserror", "time", + "tracing", "ureq", "uuid", ] @@ -3043,7 +3117,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.6.1" +version = "1.7.0" dependencies = [ "criterion", "serde_json", @@ -3051,13 +3125,14 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "8.3.0" +version = "9.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" +checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4" dependencies = [ "base64 0.21.7", + "js-sys", "pem", - "ring 0.16.20", + "ring", "serde", "serde_json", "simple_asn1", @@ -3085,6 +3160,12 @@ version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -3112,6 +3193,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "libloading" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "libm" version = "0.2.7" @@ -3128,6 +3219,17 @@ dependencies = [ "libc", ] +[[package]] +name = "libproc" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef" +dependencies = [ + "bindgen", + "errno", + "libc", +] + [[package]] name = "libz-sys" version = "1.1.12" @@ -3555,7 +3657,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.6.1" +version = "1.7.0" dependencies = [ "insta", "md5", @@ -3564,7 +3666,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.6.1" +version = "1.7.0" dependencies = [ "actix-cors", "actix-http", @@ -3586,7 +3688,6 @@ dependencies = [ "deserr", "dump", "either", - "env_logger", "file-store", "flate2", "fst", @@ -3601,7 +3702,6 @@ dependencies = [ "itertools 0.11.0", "jsonwebtoken", "lazy_static", - "log", "manifest-dir-macros", "maplit", "meili-snap", @@ -3623,7 +3723,7 @@ dependencies = [ "rayon", "regex", "reqwest", - "rustls 0.20.9", + "rustls", "rustls-pemfile", "segment", "serde", @@ -3644,6 +3744,10 @@ dependencies = [ "tokio", "tokio-stream", "toml", + "tracing", + "tracing-actix-web", + "tracing-subscriber", + "tracing-trace", "url", "urlencoding", "uuid", @@ -3655,7 +3759,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.6.1" +version = "1.7.0" dependencies = [ "base64 0.21.7", "enum-iterator", @@ -3674,7 +3778,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.6.1" +version = "1.7.0" dependencies = [ "actix-web", "anyhow", @@ -3704,7 +3808,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.6.1" +version = "1.7.0" dependencies = [ "anyhow", "clap", @@ -3743,7 +3847,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.6.1" +version = "1.7.0" dependencies = [ "arroy", "big_s", @@ -3776,7 +3880,6 @@ dependencies = [ "json-depth-checker", "levenshtein_automata", "liquid", - "log", "logging_timer", "maplit", "md5", @@ -3805,6 +3908,7 @@ dependencies = [ "time", "tokenizers", "tokio", + "tracing", "uuid", ] @@ -3882,9 +3986,10 @@ dependencies = [ ] [[package]] -name = "nelson" -version = "0.1.0" -source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a" +name = "mutually_exclusive_features" +version = "0.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" [[package]] name = "nom" @@ -3916,6 +4021,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-bigint" version = "0.4.3" @@ -4037,6 +4152,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + [[package]] name = "page_size" version = "0.5.0" @@ -4125,12 +4252,19 @@ dependencies = [ ] [[package]] -name = "pem" -version = "1.1.1" +name = "peeking_take_while" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "pem" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" dependencies = [ - "base64 0.13.1", + "base64 0.21.7", + "serde", ] [[package]] @@ -4141,7 +4275,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.6.1" +version = "1.7.0" dependencies = [ "big_s", "serde_json", @@ -4243,6 +4377,26 @@ dependencies = [ "siphasher 0.3.11", ] +[[package]] +name = "pin-project" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -4641,20 +4795,20 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.6", + "rustls", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "system-configuration", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 0.25.3", + "webpki-roots", "winreg", ] @@ -4672,30 +4826,15 @@ checksum = "b9b1a3d5f46d53f4a3478e2be4a5a5ce5108ea58b100dcd139830eae7f79a3a1" [[package]] name = "ring" -version = "0.16.20" +version = "0.17.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - -[[package]] -name = "ring" -version = "0.17.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9babe80d5c16becf6594aa32ad2be8fe08498e7ae60b77de8df700e67f191d7e" +checksum = "688c63d65483050968b2a8937f7995f443e27041a0f7700aa59b0822aedebb74" dependencies = [ "cc", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.48.0", ] @@ -4773,24 +4912,12 @@ dependencies = [ [[package]] name = "rustls" -version = "0.20.9" +version = "0.21.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" +checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", - "ring 0.16.20", - "sct", - "webpki", -] - -[[package]] -name = "rustls" -version = "0.21.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1feddffcfcc0b33f5c6ce9a29e341e4cd59c3f78e7ee45f4a40c038b1d6cbb" -dependencies = [ - "log", - "ring 0.16.20", + "ring", "rustls-webpki", "sct", ] @@ -4810,8 +4937,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.3", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -4853,12 +4980,12 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "sct" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", + "ring", + "untrusted", ] [[package]] @@ -4994,6 +5121,21 @@ dependencies = [ "digest", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -5109,12 +5251,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -5326,6 +5462,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tiktoken-rs" version = "0.5.8" @@ -5466,24 +5612,13 @@ dependencies = [ "syn 2.0.48", ] -[[package]] -name = "tokio-rustls" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" -dependencies = [ - "rustls 0.20.9", - "tokio", - "webpki", -] - [[package]] name = "tokio-rustls" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.6", + "rustls", "tokio", ] @@ -5554,17 +5689,29 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "log", "pin-project-lite", "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-actix-web" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78" +dependencies = [ + "actix-web", + "mutually_exclusive_features", + "pin-project", + "tracing", + "uuid", +] + [[package]] name = "tracing-attributes" version = "0.1.27" @@ -5578,11 +5725,63 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-error" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e" +dependencies = [ + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tracing-trace" +version = "0.1.0" +dependencies = [ + "byte-unit", + "color-spantrace", + "fxprof-processed-profile", + "libproc", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-error", + "tracing-subscriber", ] [[package]] @@ -5675,12 +5874,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -5697,13 +5890,13 @@ dependencies = [ "flate2", "log", "once_cell", - "rustls 0.21.6", + "rustls", "rustls-webpki", "serde", "serde_json", "socks", "url", - "webpki-roots 0.25.3", + "webpki-roots", ] [[package]] @@ -5758,6 +5951,12 @@ dependencies = [ "serde", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vcpkg" version = "0.2.15" @@ -5907,25 +6106,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ecc0cd7cac091bf682ec5efa18b1cff79d617b84181f38b3951dbe135f607f" -dependencies = [ - "ring 0.16.20", - "untrusted 0.7.1", -] - -[[package]] -name = "webpki-roots" -version = "0.22.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c71e40d7d2c34a5106301fb632274ca37242cd0c9d3e64dbece371a40a2d87" -dependencies = [ - "webpki", -] - [[package]] name = "webpki-roots" version = "0.25.3" @@ -6232,7 +6412,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.6.1" +version = "1.7.0" dependencies = [ "cargo_metadata", "clap", @@ -6287,6 +6467,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "zerofrom" version = "0.1.3" diff --git a/Cargo.toml b/Cargo.toml index a0c6c3ac9..11190025a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,12 +16,16 @@ members = [ "json-depth-checker", "benchmarks", "fuzzers", + "tracing-trace", "xtask", ] [workspace.package] -version = "1.6.1" -authors = ["Quentin de Quelen ", "Clément Renault "] +version = "1.7.0" +authors = [ + "Quentin de Quelen ", + "Clément Renault ", +] description = "Meilisearch HTTP server" homepage = "https://meilisearch.com" readme = "README.md" diff --git a/README.md b/README.md index 35b4cb97a..a1c5c2f9d 100644 --- a/README.md +++ b/README.md @@ -41,10 +41,10 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f ## ✨ Features - **Search-as-you-type:** find search results in less than 50 milliseconds -- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#typo-tolerance):** get relevant matches even when queries contain typos and misspellings +- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need -- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features#synonyms):** configure synonyms to include more relevant content in your search results +- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling @@ -61,8 +61,6 @@ You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. -You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) for an introduction to some of Meilisearch's most popular features. - ## ⚡ Supercharge your Meilisearch experience Say goodbye to server deployment and manual updates with [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). No credit card required. @@ -101,7 +99,7 @@ Meilisearch is a search engine created by [Meili](https://www.welcometothejungle - For feature requests, please visit our [product repository](https://github.com/meilisearch/product/discussions) - Found a bug? Open an [issue](https://github.com/meilisearch/meilisearch/issues)! -- Want to be part of our Discord community? [Join us!](https://discord.gg/meilisearch) +- Want to be part of our Discord community? [Join us!](https://discord.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=contact) Thank you for your support! diff --git a/dump/Cargo.toml b/dump/Cargo.toml index 941cec72d..92830c782 100644 --- a/dump/Cargo.toml +++ b/dump/Cargo.toml @@ -14,7 +14,6 @@ license.workspace = true anyhow = "1.0.79" flate2 = "1.0.28" http = "0.2.11" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } once_cell = "1.19.0" @@ -26,6 +25,7 @@ tar = "0.4.40" tempfile = "3.9.0" thiserror = "1.0.56" time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tracing = "0.1.40" uuid = { version = "1.6.1", features = ["serde", "v4"] } [dev-dependencies] diff --git a/dump/src/reader/compat/v1_to_v2.rs b/dump/src/reader/compat/v1_to_v2.rs index 789e8e0b1..0d050497b 100644 --- a/dump/src/reader/compat/v1_to_v2.rs +++ b/dump/src/reader/compat/v1_to_v2.rs @@ -120,7 +120,7 @@ impl From for v2::Settings { criterion.as_ref().map(ToString::to_string) } Err(()) => { - log::warn!( + tracing::warn!( "Could not import the following ranking rule: `{}`.", ranking_rule ); @@ -152,11 +152,11 @@ impl From for Option { use v2::updates::UpdateStatus as UpdateStatusV2; Some(match source { UpdateStatusV1::Enqueued { content } => { - log::warn!( + tracing::warn!( "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", content.update_id ); - log::warn!("Task will be skipped in the queue of imported tasks."); + tracing::warn!("Task will be skipped in the queue of imported tasks."); return None; } @@ -229,7 +229,7 @@ impl From for Option { Some(match source { v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, v1::update::UpdateType::Customs => { - log::warn!("Ignoring task with type 'Customs' that is no longer supported"); + tracing::warn!("Ignoring task with type 'Customs' that is no longer supported"); return None; } v1::update::UpdateType::DocumentsAddition { .. } => { @@ -296,7 +296,7 @@ impl From for Option { v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), v1::settings::RankingRule::WordsPosition => { - log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); + tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); None } v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs index 1d5f4e153..1d4238290 100644 --- a/dump/src/reader/compat/v2_to_v3.rs +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -146,8 +146,8 @@ impl From for v3::updates::UpdateStatus { started_processing_at: processing.started_processing_at, }), Err(e) => { - log::warn!("Error with task {}: {}", processing.from.update_id, e); - log::warn!("Task will be marked as `Failed`."); + tracing::warn!("Error with task {}: {}", processing.from.update_id, e); + tracing::warn!("Task will be marked as `Failed`."); v3::updates::UpdateStatus::Failed(v3::updates::Failed { from: v3::updates::Processing { from: v3::updates::Enqueued { @@ -172,8 +172,8 @@ impl From for v3::updates::UpdateStatus { enqueued_at: enqueued.enqueued_at, }), Err(e) => { - log::warn!("Error with task {}: {}", enqueued.update_id, e); - log::warn!("Task will be marked as `Failed`."); + tracing::warn!("Error with task {}: {}", enqueued.update_id, e); + tracing::warn!("Task will be marked as `Failed`."); v3::updates::UpdateStatus::Failed(v3::updates::Failed { from: v3::updates::Processing { from: v3::updates::Enqueued { @@ -353,7 +353,7 @@ impl From for v3::Code { "malformed_payload" => v3::Code::MalformedPayload, "missing_payload" => v3::Code::MissingPayload, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v3::Code::UnretrievableErrorCode } } diff --git a/dump/src/reader/compat/v3_to_v4.rs b/dump/src/reader/compat/v3_to_v4.rs index b4153eb31..244948200 100644 --- a/dump/src/reader/compat/v3_to_v4.rs +++ b/dump/src/reader/compat/v3_to_v4.rs @@ -76,20 +76,20 @@ impl CompatV3ToV4 { let index_uid = match index_uid { Some(uid) => uid, None => { - log::warn!( + tracing::warn!( "Error while importing the update {}.", task.update.id() ); - log::warn!( + tracing::warn!( "The index associated to the uuid `{}` could not be retrieved.", task.uuid.to_string() ); if task.update.is_finished() { // we're fucking with his history but not his data, that's ok-ish. - log::warn!("The index-uuid will be set as `unknown`."); + tracing::warn!("The index-uuid will be set as `unknown`."); String::from("unknown") } else { - log::warn!("The task will be ignored."); + tracing::warn!("The task will be ignored."); return None; } } diff --git a/dump/src/reader/compat/v4_to_v5.rs b/dump/src/reader/compat/v4_to_v5.rs index 850e2cccd..aa8441779 100644 --- a/dump/src/reader/compat/v4_to_v5.rs +++ b/dump/src/reader/compat/v4_to_v5.rs @@ -305,7 +305,7 @@ impl From for v5::ResponseError { "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v5::Code::UnretrievableErrorCode } }; diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index 9351ae70d..e00d3a599 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -304,7 +304,7 @@ impl From for v6::ResponseError { "immutable_field" => v6::Code::BadRequest, "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, other => { - log::warn!("Unknown error code {}", other); + tracing::warn!("Unknown error code {}", other); v6::Code::UnretrievableErrorCode } }; @@ -329,7 +329,7 @@ impl From> for v6::Settings { new_ranking_rules.push(new_rule); } Err(_) => { - log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") + tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") } } } diff --git a/dump/src/reader/v6/mod.rs b/dump/src/reader/v6/mod.rs index 4e980e03e..50b9751a2 100644 --- a/dump/src/reader/v6/mod.rs +++ b/dump/src/reader/v6/mod.rs @@ -2,10 +2,10 @@ use std::fs::{self, File}; use std::io::{BufRead, BufReader, ErrorKind}; use std::path::Path; -use log::debug; pub use meilisearch_types::milli; use tempfile::TempDir; use time::OffsetDateTime; +use tracing::debug; use uuid::Uuid; use super::Document; diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index 75db9bb5f..15c4168bc 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -1,5 +1,5 @@ use std::fs::File as StdFile; -use std::ops::{Deref, DerefMut}; +use std::io::Write; use std::path::{Path, PathBuf}; use std::str::FromStr; @@ -22,20 +22,6 @@ pub enum Error { pub type Result = std::result::Result; -impl Deref for File { - type Target = NamedTempFile; - - fn deref(&self) -> &Self::Target { - &self.file - } -} - -impl DerefMut for File { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.file - } -} - #[derive(Clone, Debug)] pub struct FileStore { path: PathBuf, @@ -56,7 +42,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::new_v4(); let path = self.path.join(uuid.to_string()); - let update_file = File { file, path }; + let update_file = File { file: Some(file), path }; Ok((uuid, update_file)) } @@ -67,7 +53,7 @@ impl FileStore { let file = NamedTempFile::new_in(&self.path)?; let uuid = Uuid::from_u128(uuid); let path = self.path.join(uuid.to_string()); - let update_file = File { file, path }; + let update_file = File { file: Some(file), path }; Ok((uuid, update_file)) } @@ -136,16 +122,40 @@ impl FileStore { pub struct File { path: PathBuf, - file: NamedTempFile, + file: Option, } impl File { + pub fn dry_file() -> Result { + Ok(Self { path: PathBuf::new(), file: None }) + } + pub fn persist(self) -> Result<()> { - self.file.persist(&self.path)?; + if let Some(file) = self.file { + file.persist(&self.path)?; + } Ok(()) } } +impl Write for File { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + if let Some(file) = self.file.as_mut() { + file.write(buf) + } else { + Ok(buf.len()) + } + } + + fn flush(&mut self) -> std::io::Result<()> { + if let Some(file) = self.file.as_mut() { + file.flush() + } else { + Ok(()) + } + } +} + #[cfg(test)] mod test { use std::io::Write; diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 4300bc12c..890312854 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -19,7 +19,6 @@ dump = { path = "../dump" } enum-iterator = "1.5.0" file-store = { path = "../file-store" } flate2 = "1.0.28" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } page_size = "0.5.0" @@ -30,7 +29,13 @@ serde_json = { version = "1.0.111", features = ["preserve_order"] } synchronoise = "1.0.1" tempfile = "3.9.0" thiserror = "1.0.56" -time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.31", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } +tracing = "0.1.40" ureq = "2.9.1" uuid = { version = "1.6.1", features = ["serde", "v4"] } @@ -39,4 +44,3 @@ big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.34.0", features = ["json", "redactions"] } meili-snap = { path = "../meili-snap" } -nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 01b0ddc1e..8e2eb26a0 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -24,7 +24,6 @@ use std::fs::{self, File}; use std::io::BufWriter; use dump::IndexMetadata; -use log::{debug, error, info, trace}; use meilisearch_types::error::Code; use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; @@ -514,6 +513,7 @@ impl IndexScheduler { /// 3. We get the *next* snapshot to process. /// 4. We get the *next* dump to process. /// 5. We get the *next* tasks to process for a specific index. + #[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")] pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result> { #[cfg(test)] self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; @@ -619,6 +619,7 @@ impl IndexScheduler { /// The list of tasks that were processed. The metadata of each task in the returned /// list is updated accordingly, with the exception of the its date fields /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] pub(crate) fn process_batch(&self, batch: Batch) -> Result> { #[cfg(test)] { @@ -668,9 +669,10 @@ impl IndexScheduler { Ok(()) => { for content_uuid in canceled_tasks_content_uuids { if let Err(error) = self.delete_update_file(content_uuid) { - error!( - "We failed deleting the content file indentified as {}: {}", - content_uuid, error + tracing::error!( + file_content_uuid = %content_uuid, + %error, + "Failed deleting content file" ) } } @@ -969,7 +971,10 @@ impl IndexScheduler { match res { Ok(_) => (), - Err(e) => error!("Could not write the stats of the index {}", e), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), } Ok(tasks) @@ -997,7 +1002,7 @@ impl IndexScheduler { builder.set_primary_key(primary_key); let must_stop_processing = self.must_stop_processing.clone(); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; index_wtxn.commit()?; @@ -1024,7 +1029,10 @@ impl IndexScheduler { match res { Ok(_) => (), - Err(e) => error!("Could not write the stats of the index {}", e), + Err(e) => tracing::error!( + error = &e as &dyn std::error::Error, + "Could not write the stats of the index" + ), } Ok(vec![task]) @@ -1143,6 +1151,11 @@ impl IndexScheduler { /// /// ## Return /// The list of processed tasks. + #[tracing::instrument( + level = "trace", + skip(self, index_wtxn, index), + target = "indexing::scheduler" + )] fn apply_index_operation<'i>( &self, index_wtxn: &mut RwTxn<'i>, @@ -1203,7 +1216,7 @@ impl IndexScheduler { milli::update::Settings::new(index_wtxn, index, indexer_config); builder.set_primary_key(primary_key); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.clone().get(), )?; primary_key_has_been_set = true; @@ -1222,7 +1235,7 @@ impl IndexScheduler { index, indexer_config, config, - |indexing_step| trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!(?indexing_step, "Update"), || must_stop_processing.get(), )?; @@ -1294,7 +1307,7 @@ impl IndexScheduler { if !tasks.iter().all(|res| res.error.is_some()) { let addition = builder.execute()?; - info!("document addition done: {:?}", addition); + tracing::info!(indexing_result = ?addition, "document indexing done"); } else if primary_key_has_been_set { // Everything failed but we've set a primary key. // We need to remove it. @@ -1302,7 +1315,7 @@ impl IndexScheduler { milli::update::Settings::new(index_wtxn, index, indexer_config); builder.reset_primary_key(); builder.execute( - |indexing_step| trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!(update = ?indexing_step), || must_stop_processing.clone().get(), )?; } @@ -1372,7 +1385,7 @@ impl IndexScheduler { let must_stop_processing = self.must_stop_processing.clone(); builder.execute( - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; @@ -1584,7 +1597,7 @@ fn delete_document_by_filter<'a>( index, indexer_config, config, - |indexing_step| debug!("update: {:?}", indexing_step), + |indexing_step| tracing::debug!(update = ?indexing_step), || must_stop_processing.get(), )?; diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index bbe526460..223b84762 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -48,6 +48,8 @@ impl From for Code { pub enum Error { #[error("{1}")] WithCustomErrorCode(Code, Box), + #[error("Received bad task id: {received} should be >= to {expected}.")] + BadTaskId { received: TaskId, expected: TaskId }, #[error("Index `{0}` not found.")] IndexNotFound(String), #[error("Index `{0}` already exists.")] @@ -161,6 +163,7 @@ impl Error { match self { Error::IndexNotFound(_) | Error::WithCustomErrorCode(_, _) + | Error::BadTaskId { .. } | Error::IndexAlreadyExists(_) | Error::SwapDuplicateIndexFound(_) | Error::SwapDuplicateIndexesFound(_) @@ -205,6 +208,7 @@ impl ErrorCode for Error { fn error_code(&self) -> Code { match self { Error::WithCustomErrorCode(code, _) => *code, + Error::BadTaskId { .. } => Code::BadRequest, Error::IndexNotFound(_) => Code::IndexNotFound, Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound, diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index dad5e86f4..4fd5bd0e7 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -30,19 +30,6 @@ impl RoFeatures { self.runtime } - pub fn check_score_details(&self) -> Result<()> { - if self.runtime.score_details { - Ok(()) - } else { - Err(FeatureNotEnabledError { - disabled_action: "Computing score details", - feature: "score details", - issue_link: "https://github.com/meilisearch/product/discussions/674", - } - .into()) - } - } - pub fn check_metrics(&self) -> Result<()> { if self.runtime.metrics { Ok(()) @@ -56,6 +43,19 @@ impl RoFeatures { } } + pub fn check_logs_route(&self) -> Result<()> { + if self.runtime.logs_route { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "getting logs through the `/logs/stream` route", + feature: "logs route", + issue_link: "https://github.com/orgs/meilisearch/discussions/721", + } + .into()) + } + } + pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> { if self.runtime.vector_store { Ok(()) @@ -94,6 +94,7 @@ impl FeatureData { runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { metrics: instance_features.metrics || persisted_features.metrics, + logs_route: instance_features.logs_route || persisted_features.logs_route, ..persisted_features })); diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 58ec2bf11..14908120c 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock}; use std::time::Duration; use std::{fs, thread}; -use log::error; use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::{FieldDistribution, Index}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; +use tracing::error; use uuid::Uuid; use self::index_map::IndexMap; diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 42f041578..988e75b81 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { let IndexScheduler { autobatching_enabled, + cleanup_enabled: _, must_stop_processing: _, processing_tasks, file_store, diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 946a2a33e..1c3b93bce 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -264,6 +264,9 @@ pub struct IndexSchedulerOptions { /// Set to `true` iff the index scheduler is allowed to automatically /// batch tasks together, to process multiple tasks at once. pub autobatching_enabled: bool, + /// Set to `true` iff the index scheduler is allowed to automatically + /// delete the finished tasks when there are too many tasks. + pub cleanup_enabled: bool, /// The maximum number of tasks stored in the task queue before starting /// to auto schedule task deletions. pub max_number_of_tasks: usize, @@ -324,6 +327,9 @@ pub struct IndexScheduler { /// Whether auto-batching is enabled or not. pub(crate) autobatching_enabled: bool, + /// Whether we should automatically cleanup the task queue or not. + pub(crate) cleanup_enabled: bool, + /// The max number of tasks allowed before the scheduler starts to delete /// the finished tasks automatically. pub(crate) max_number_of_tasks: usize, @@ -390,6 +396,7 @@ impl IndexScheduler { index_mapper: self.index_mapper.clone(), wake_up: self.wake_up.clone(), autobatching_enabled: self.autobatching_enabled, + cleanup_enabled: self.cleanup_enabled, max_number_of_tasks: self.max_number_of_tasks, max_number_of_batched_tasks: self.max_number_of_batched_tasks, puffin_frame: self.puffin_frame.clone(), @@ -491,6 +498,7 @@ impl IndexScheduler { wake_up: Arc::new(SignalEvent::auto(true)), puffin_frame: Arc::new(puffin::GlobalFrameView::default()), autobatching_enabled: options.autobatching_enabled, + cleanup_enabled: options.cleanup_enabled, max_number_of_tasks: options.max_number_of_tasks, max_number_of_batched_tasks: options.max_number_of_batched_tasks, dumps_path: options.dumps_path, @@ -535,17 +543,17 @@ impl IndexScheduler { let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) { DEFAULT_BUDGET } else { - log::debug!("determining budget with dichotomic search"); + tracing::debug!("determining budget with dichotomic search"); utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| { Self::is_good_heed(tasks_path, map_size) }) }; - log::debug!("memmap budget: {budget}B"); + tracing::debug!("memmap budget: {budget}B"); let mut budget = budget / 2; if task_db_size > (budget / 2) { task_db_size = clamp_to_page_size(budget * 2 / 5); - log::debug!( + tracing::debug!( "Decreasing max size of task DB to {task_db_size}B due to constrained memory space" ); } @@ -555,13 +563,13 @@ impl IndexScheduler { let budget = budget; let task_db_size = task_db_size; - log::debug!("index budget: {budget}B"); + tracing::debug!("index budget: {budget}B"); let mut index_count = budget / base_map_size; if index_count < 2 { // take a bit less than half than the budget to make sure we can always afford to open an index let map_size = (budget * 2) / 5; // single index of max budget - log::debug!("1 index of {map_size}B can be opened simultaneously."); + tracing::debug!("1 index of {map_size}B can be opened simultaneously."); return IndexBudget { map_size, index_count: 1, task_db_size }; } // give us some space for an additional index when the cache is already full @@ -570,7 +578,7 @@ impl IndexScheduler { if index_count > max_index_count { index_count = max_index_count; } - log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); + tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); IndexBudget { map_size: base_map_size, index_count, task_db_size } } @@ -617,7 +625,7 @@ impl IndexScheduler { Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Err(e) => { - log::error!("{e}"); + tracing::error!("{e}"); // Wait one second when an irrecoverable error occurs. if !e.is_recoverable() { std::thread::sleep(Duration::from_secs(1)); @@ -634,15 +642,15 @@ impl IndexScheduler { let mut file = match File::create(format!("{}.puffin", now)) { Ok(file) => file, Err(e) => { - log::error!("{e}"); + tracing::error!("{e}"); continue; } }; if let Err(e) = frame_view.save_to_writer(&mut file) { - log::error!("{e}"); + tracing::error!("{e}"); } if let Err(e) = file.sync_all() { - log::error!("{e}"); + tracing::error!("{e}"); } // We erase this frame view as it is no more useful. We want to // measure the new frames now that we exported the previous ones. @@ -993,7 +1001,12 @@ impl IndexScheduler { /// Register a new task in the scheduler. /// /// If it fails and data was associated with the task, it tries to delete the associated data. - pub fn register(&self, kind: KindWithContent) -> Result { + pub fn register( + &self, + kind: KindWithContent, + task_id: Option, + dry_run: bool, + ) -> Result { let mut wtxn = self.env.write_txn()?; // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task @@ -1003,8 +1016,16 @@ impl IndexScheduler { return Err(Error::NoSpaceLeftInTaskQueue); } + let next_task_id = self.next_task_id(&wtxn)?; + + if let Some(uid) = task_id { + if uid < next_task_id { + return Err(Error::BadTaskId { received: uid, expected: next_task_id }); + } + } + let mut task = Task { - uid: self.next_task_id(&wtxn)?, + uid: task_id.unwrap_or(next_task_id), enqueued_at: OffsetDateTime::now_utc(), started_at: None, finished_at: None, @@ -1021,6 +1042,11 @@ impl IndexScheduler { // (that it does not contain duplicate indexes). check_index_swap_validity(&task)?; + // At this point the task is going to be registered and no further checks will be done + if dry_run { + return Ok(task); + } + // Get rid of the mutability. let task = task; @@ -1085,8 +1111,12 @@ impl IndexScheduler { /// The returned file and uuid can be used to associate /// some data to a task. The file will be kept until /// the task has been fully processed. - pub fn create_update_file(&self) -> Result<(Uuid, file_store::File)> { - Ok(self.file_store.new_update()?) + pub fn create_update_file(&self, dry_run: bool) -> Result<(Uuid, file_store::File)> { + if dry_run { + Ok((Uuid::nil(), file_store::File::dry_file()?)) + } else { + Ok(self.file_store.new_update()?) + } } #[cfg(test)] @@ -1126,7 +1156,9 @@ impl IndexScheduler { self.breakpoint(Breakpoint::Start); } - self.cleanup_task_queue()?; + if self.cleanup_enabled { + self.cleanup_task_queue()?; + } let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; let batch = @@ -1190,10 +1222,10 @@ impl IndexScheduler { self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; if let Err(e) = self.delete_persisted_task_data(&task) { - log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } } - log::info!("A batch of tasks was successfully completed."); + tracing::info!("A batch of tasks was successfully completed."); } // If we have an abortion error we must stop the tick here and re-schedule tasks. Err(Error::Milli(milli::Error::InternalError( @@ -1247,7 +1279,7 @@ impl IndexScheduler { self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; if let Err(e) = self.delete_persisted_task_data(&task) { - log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); } self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; @@ -1341,7 +1373,7 @@ impl IndexScheduler { }; if let Err(e) = request.send(reader) { - log::error!("While sending data to the webhook: {e}"); + tracing::error!("While sending data to the webhook: {e}"); } } @@ -1367,12 +1399,12 @@ impl IndexScheduler { // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete // the deletion tasks we enqueued ourselves. if to_delete.len() < 2 { - log::warn!("The task queue is almost full, but no task can be deleted yet."); + tracing::warn!("The task queue is almost full, but no task can be deleted yet."); // the only thing we can do is hope that the user tasks are going to finish return Ok(()); } - log::info!( + tracing::info!( "The task queue is almost full. Deleting the oldest {} finished tasks.", to_delete.len() ); @@ -1386,13 +1418,17 @@ impl IndexScheduler { // increase time by one nanosecond so that the enqueuedAt of the last task to delete is also lower than that date. let delete_before = last_task_to_delete.enqueued_at + Duration::from_nanos(1); - self.register(KindWithContent::TaskDeletion { - query: format!( - "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", - delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, - ), - tasks: to_delete, - })?; + self.register( + KindWithContent::TaskDeletion { + query: format!( + "?beforeEnqueuedAt={}&statuses=succeeded,failed,canceled", + delete_before.format(&Rfc3339).map_err(|_| Error::CorruptedTaskQueue)?, + ), + tasks: to_delete, + }, + None, + false, + )?; Ok(()) } @@ -1513,8 +1549,8 @@ impl<'a> Dump<'a> { ) -> Result { let content_uuid = match content_file { Some(content_file) if task.status == Status::Enqueued => { - let (uuid, mut file) = self.index_scheduler.create_update_file()?; - let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); + let (uuid, mut file) = self.index_scheduler.create_update_file(false)?; + let mut builder = DocumentsBatchBuilder::new(&mut file); for doc in content_file { builder.append_json_object(&doc?)?; } @@ -1698,7 +1734,7 @@ pub struct IndexStats { #[cfg(test)] mod tests { - use std::io::{BufWriter, Seek, Write}; + use std::io::{BufWriter, Write}; use std::time::Instant; use big_s::S; @@ -1770,6 +1806,7 @@ mod tests { index_count: 5, indexer_config, autobatching_enabled: true, + cleanup_enabled: true, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: usize::MAX, instance_features: Default::default(), @@ -1845,7 +1882,7 @@ mod tests { /// Adapting to the new json reading interface pub fn read_json( bytes: &[u8], - write: impl Write + Seek, + write: impl Write, ) -> std::result::Result { let temp_file = NamedTempFile::new().unwrap(); let mut buffer = BufWriter::new(temp_file.reopen().unwrap()); @@ -1872,7 +1909,7 @@ mod tests { ); let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); (file, documents_count) } @@ -2016,7 +2053,7 @@ mod tests { for (idx, kind) in kinds.into_iter().enumerate() { let k = kind.as_kind(); - let task = index_scheduler.register(kind).unwrap(); + let task = index_scheduler.register(kind, None, false).unwrap(); index_scheduler.assert_internally_consistent(); assert_eq!(task.uid, idx as u32); @@ -2031,18 +2068,18 @@ mod tests { fn insert_task_while_another_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); // while the task is processing can we register another task? - index_scheduler.register(index_creation_task("index_b", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_b", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }) + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); } @@ -2051,7 +2088,7 @@ mod tests { fn test_task_is_processing() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); - index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + index_scheduler.register(index_creation_task("index_a", "id"), None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_a_task"); handle.advance_till([Start, BatchCreated]); @@ -2065,17 +2102,25 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2094,22 +2139,26 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); index_scheduler - .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); @@ -2142,7 +2191,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2151,10 +2200,14 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }, + None, + false, + ) .unwrap(); // again, no progress made at all, but one more task is registered snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); @@ -2188,7 +2241,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2199,10 +2252,14 @@ mod tests { // Now we delete the first task index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); @@ -2225,7 +2282,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); @@ -2237,10 +2294,14 @@ mod tests { // Now we delete the first task multiple times in a row for _ in 0..2 { index_scheduler - .register(KindWithContent::TaskDeletion { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2260,17 +2321,21 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); @@ -2292,27 +2357,35 @@ mod tests { }"#; index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); @@ -2333,24 +2406,32 @@ mod tests { ]"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); index_scheduler - .register(KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }) + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2373,10 +2454,14 @@ mod tests { fn document_deletion_and_document_addition() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); index_scheduler - .register(KindWithContent::DocumentDeletion { - index_uid: S("doggos"), - documents_ids: vec![S("1"), S("2")], - }) + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2387,17 +2472,21 @@ mod tests { ]"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); @@ -2428,17 +2517,25 @@ mod tests { for name in index_names { index_scheduler - .register(KindWithContent::IndexCreation { - index_uid: name.to_string(), - primary_key: None, - }) + .register( + KindWithContent::IndexCreation { + index_uid: name.to_string(), + primary_key: None, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } for name in index_names { index_scheduler - .register(KindWithContent::DocumentClear { index_uid: name.to_string() }) + .register( + KindWithContent::DocumentClear { index_uid: name.to_string() }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2463,7 +2560,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2477,18 +2574,26 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); @@ -2498,7 +2603,9 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); - index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }).unwrap(); + index_scheduler + .register(KindWithContent::IndexSwap { swaps: vec![] }, None, false) + .unwrap(); handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); } @@ -2515,7 +2622,7 @@ mod tests { ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_n_successful_batches(4); @@ -2525,12 +2632,16 @@ mod tests { snapshot!(first_snap, name: "initial_tasks_processed"); let err = index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, + ], + }, + None, + false, + ) .unwrap_err(); snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); @@ -2539,13 +2650,17 @@ mod tests { // Index `e` does not exist, but we don't check its existence yet index_scheduler - .register(KindWithContent::IndexSwap { - swaps: vec![ - IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, - IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, - IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, - ], - }) + .register( + KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, + IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, + ], + }, + None, + false, + ) .unwrap(); handle.advance_one_failed_batch(); // Now the first swap should have an error message saying `e` and `f` do not exist @@ -2563,20 +2678,24 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler - .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }, None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler)); @@ -2601,7 +2720,7 @@ mod tests { }, ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2618,7 +2737,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0)) + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2626,10 +2745,14 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) .unwrap(); handle.advance_one_successful_batch(); @@ -2644,7 +2767,7 @@ mod tests { file0.persist().unwrap(); let _ = index_scheduler - .register(replace_document_import_task("catto", None, 0, documents_count0)) + .register(replace_document_import_task("catto", None, 0, documents_count0), None, false) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -2652,10 +2775,14 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); @@ -2685,7 +2812,7 @@ mod tests { replace_document_import_task("wolfo", None, 2, documents_count2), ]; for task in to_enqueue { - let _ = index_scheduler.register(task).unwrap(); + let _ = index_scheduler.register(task, None, false).unwrap(); index_scheduler.assert_internally_consistent(); } handle.advance_one_successful_batch(); @@ -2693,10 +2820,14 @@ mod tests { handle.advance_till([Start, BatchCreated, InsideProcessBatch]); index_scheduler - .register(KindWithContent::TaskCancelation { - query: "test_query".to_owned(), - tasks: RoaringBitmap::from_iter([0, 1, 2]), - }) + .register( + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); @@ -2721,17 +2852,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2768,17 +2903,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2817,17 +2956,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2867,17 +3010,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2918,17 +3065,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: UpdateDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -2973,13 +3124,13 @@ mod tests { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("doggo", "bone"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); let kind = index_creation_task("whalo", "plankton"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); let kind = index_creation_task("catto", "his_own_vomit"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); handle.advance_n_successful_batches(3); @@ -3037,11 +3188,11 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3260,17 +3411,17 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3346,20 +3497,20 @@ mod tests { IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); let kind = index_creation_task("catto", "mouse"); - let _ = index_scheduler.register(kind).unwrap(); + let _ = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _ = index_scheduler.register(kind).unwrap(); + let _ = index_scheduler.register(kind, None, false).unwrap(); let kind = KindWithContent::IndexSwap { swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], }; - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); handle.advance_n_successful_batches(1); let kind = KindWithContent::TaskCancelation { query: "test_query".to_string(), tasks: [0, 1, 2, 3].into_iter().collect(), }; - let task_cancelation = index_scheduler.register(kind).unwrap(); + let task_cancelation = index_scheduler.register(kind, None, false).unwrap(); handle.advance_n_successful_batches(1); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); @@ -3394,7 +3545,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); handle.advance_one_failed_batch(); @@ -3416,17 +3567,21 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated]); @@ -3454,17 +3609,21 @@ mod tests { }"#; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); @@ -3510,17 +3669,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3558,17 +3721,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3596,7 +3763,11 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3612,17 +3783,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3655,7 +3830,11 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3671,17 +3850,21 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: false, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3718,7 +3901,11 @@ mod tests { // Create the index. index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }, + None, + false, + ) .unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_one_successful_batch(); @@ -3735,17 +3922,21 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3788,17 +3979,21 @@ mod tests { let allow_index_creation = i % 2 != 0; let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S("id")), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3838,19 +4033,23 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3899,19 +4098,23 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -3956,19 +4159,23 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: Some(S(primary_key)), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S(primary_key)), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4037,19 +4244,23 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4120,19 +4331,23 @@ mod tests { ); let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(id as u128).unwrap(); - let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); assert_eq!(documents_count, 1); file.persist().unwrap(); index_scheduler - .register(KindWithContent::DocumentAdditionOrUpdate { - index_uid: S("doggos"), - primary_key: primary_key.map(|pk| pk.to_string()), - method: ReplaceDocuments, - content_file: uuid, - documents_count, - allow_index_creation: true, - }) + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: primary_key.map(|pk| pk.to_string()), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) .unwrap(); index_scheduler.assert_internally_consistent(); } @@ -4186,7 +4401,7 @@ mod tests { let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); @@ -4206,15 +4421,20 @@ mod tests { }); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap(); handle.advance_one_successful_batch(); // on average this task takes ~600 bytes loop { - let result = index_scheduler.register(KindWithContent::IndexCreation { - index_uid: S("doggo"), - primary_key: None, - }); + let result = index_scheduler.register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ); if result.is_err() { break; } @@ -4224,7 +4444,11 @@ mod tests { // at this point the task DB shoud have reached its limit and we should not be able to register new tasks let result = index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code @@ -4232,10 +4456,11 @@ mod tests { // Even the task deletion that doesn't delete anything shouldn't be accepted let result = index_scheduler - .register(KindWithContent::TaskDeletion { - query: S("test"), - tasks: RoaringBitmap::new(), - }) + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: RoaringBitmap::new() }, + None, + false, + ) .unwrap_err(); snapshot!(result, @"Meilisearch cannot receive write operations because the limit of the task database has been reached. Please delete tasks to continue performing write operations."); // we won't be able to test this error in an integration test thus as a best effort test I still ensure the error return the expected error code @@ -4243,13 +4468,21 @@ mod tests { // But a task deletion that delete something should works index_scheduler - .register(KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }) + .register( + KindWithContent::TaskDeletion { query: S("test"), tasks: (0..100).collect() }, + None, + false, + ) .unwrap(); handle.advance_one_successful_batch(); // Now we should be able to enqueue a few tasks again index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap(); handle.advance_one_failed_batch(); } @@ -4262,22 +4495,38 @@ mod tests { }); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap(); handle.advance_one_successful_batch(); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap(); handle.advance_one_failed_batch(); // at this point the max number of tasks is reached // we can still enqueue multiple tasks index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap(); index_scheduler - .register(KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }) + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) .unwrap(); let rtxn = index_scheduler.env.read_txn().unwrap(); @@ -4320,16 +4569,75 @@ mod tests { drop(rtxn); } + #[test] + fn test_disable_auto_deletion_of_tasks() { + let (index_scheduler, mut handle) = + IndexScheduler::test_with_custom_config(vec![], |config| { + config.cleanup_enabled = false; + config.max_number_of_tasks = 2; + }); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_successful_batch(); + + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + handle.advance_one_failed_batch(); + + // at this point the max number of tasks is reached + // we can still enqueue multiple tasks + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }, + None, + false, + ) + .unwrap(); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); + let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]" }), name: "task_queue_is_full"); + drop(rtxn); + + // now we're above the max number of tasks + // and if we try to advance in the tick function no new task deletion should be enqueued + handle.advance_till([Start, BatchCreated]); + let rtxn = index_scheduler.env.read_txn().unwrap(); + let tasks = index_scheduler.get_task_ids(&rtxn, &Query { ..Default::default() }).unwrap(); + let tasks = index_scheduler.get_existing_tasks(&rtxn, tasks).unwrap(); + snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "task_deletion_have_not_been_enqueued"); + drop(rtxn); + } + #[test] fn basic_get_stats() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); let kind = index_creation_task("catto", "mouse"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("doggo", "sheep"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); let kind = index_creation_task("whalo", "fish"); - let _task = index_scheduler.register(kind).unwrap(); + let _task = index_scheduler.register(kind, None, false).unwrap(); snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###" { @@ -4479,11 +4787,11 @@ mod tests { query: "cancel dump".to_owned(), tasks: RoaringBitmap::from_iter([0]), }; - let _ = index_scheduler.register(dump_creation).unwrap(); + let _ = index_scheduler.register(dump_creation, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register"); handle.advance_till([Start, BatchCreated, InsideProcessBatch]); - let _ = index_scheduler.register(dump_cancellation).unwrap(); + let _ = index_scheduler.register(dump_cancellation, None, false).unwrap(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered"); snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation"); @@ -4491,4 +4799,92 @@ mod tests { handle.advance_one_successful_batch(); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); } + + #[test] + fn basic_set_taskid() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, false).unwrap(); + snapshot!(task.uid, @"0"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), false).unwrap(); + snapshot!(task.uid, @"12"); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let error = index_scheduler.register(kind, Some(5), false).unwrap_err(); + snapshot!(error, @"Received bad task id: 5 should be >= to 13."); + } + + #[test] + fn dry_run() { + let (index_scheduler, _handle) = IndexScheduler::test(true, vec![]); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, None, true).unwrap(); + snapshot!(task.uid, @"0"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r###" + ### Autobatching Enabled = true + ### Processing Tasks: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "###); + + let kind = KindWithContent::IndexCreation { index_uid: S("doggo"), primary_key: None }; + let task = index_scheduler.register(kind, Some(12), true).unwrap(); + snapshot!(task.uid, @"12"); + snapshot!(snapshot_index_scheduler(&index_scheduler), @r###" + ### Autobatching Enabled = true + ### Processing Tasks: + [] + ---------------------------------------------------------------------- + ### All Tasks: + ---------------------------------------------------------------------- + ### Status: + ---------------------------------------------------------------------- + ### Kind: + ---------------------------------------------------------------------- + ### Index Tasks: + ---------------------------------------------------------------------- + ### Index Mapper: + + ---------------------------------------------------------------------- + ### Canceled By: + + ---------------------------------------------------------------------- + ### Enqueued At: + ---------------------------------------------------------------------- + ### Started At: + ---------------------------------------------------------------------- + ### Finished At: + ---------------------------------------------------------------------- + ### File Store: + + ---------------------------------------------------------------------- + "###); + } } diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap new file mode 100644 index 000000000..988df76ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap @@ -0,0 +1,90 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "uid": 0, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "succeeded", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 1, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": { + "message": "Index `doggo` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "failed", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 2, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 3, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap new file mode 100644 index 000000000..988df76ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap @@ -0,0 +1,90 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "uid": 0, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "succeeded", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 1, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": { + "message": "Index `doggo` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "failed", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 2, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + }, + { + "uid": 3, + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]", + "error": null, + "canceledBy": null, + "details": { + "IndexInfo": { + "primary_key": null + } + }, + "status": "enqueued", + "kind": { + "indexCreation": { + "index_uid": "doggo", + "primary_key": null + } + } + } +] diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index f5bfaa036..b9edb4c1e 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -11,7 +11,7 @@ edition.workspace = true license.workspace = true [dependencies] -actix-web = { version = "4.4.1", default-features = false } +actix-web = { version = "4.5.1", default-features = false } anyhow = "1.0.79" convert_case = "0.6.0" csv = "1.3.0" diff --git a/meilisearch-types/src/document_formats.rs b/meilisearch-types/src/document_formats.rs index 0f1d995f9..50dc5bad4 100644 --- a/meilisearch-types/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -1,6 +1,6 @@ use std::fmt::{self, Debug, Display}; use std::fs::File; -use std::io::{self, Seek, Write}; +use std::io::{self, BufWriter, Write}; use std::marker::PhantomData; use memmap2::MmapOptions; @@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError { } /// Reads CSV from input and write an obkv batch to writer. -pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref()); builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?; @@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result Ok(count as u64) } -/// Reads JSON from temporary file and write an obkv batch to writer. -pub fn read_json(file: &File, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +/// Reads JSON from temporary file and write an obkv batch to writer. +pub fn read_json(file: &File, writer: impl Write) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; let mut deserializer = serde_json::Deserializer::from_slice(&mmap); @@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write + Seek) -> Result { } /// Reads JSON from temporary file and write an obkv batch to writer. -pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result { - let mut builder = DocumentsBatchBuilder::new(writer); +pub fn read_ndjson(file: &File, writer: impl Write) -> Result { + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer)); let mmap = unsafe { MmapOptions::new().map(file)? }; for result in serde_json::Deserializer::from_slice(&mmap).into_iter() { diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 2182b1836..965d2e672 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -310,6 +310,8 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; + +// Experimental features VectorEmbeddingError , InvalidRequest , BAD_REQUEST } @@ -347,6 +349,9 @@ impl ErrorCode for milli::Error { UserError::InvalidFieldForSource { .. } | UserError::MissingFieldForSource { .. } | UserError::InvalidOpenAiModel { .. } + | UserError::InvalidOpenAiModelDimensions { .. } + | UserError::InvalidOpenAiModelDimensionsMax { .. } + | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index 33afe2d24..04a5d9d6f 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -3,13 +3,14 @@ use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone, Copy, Default, PartialEq, Eq)] #[serde(rename_all = "camelCase", default)] pub struct RuntimeTogglableFeatures { - pub score_details: bool, pub vector_store: bool, pub metrics: bool, + pub logs_route: bool, pub export_puffin_reports: bool, } #[derive(Default, Debug, Clone, Copy)] pub struct InstanceTogglableFeatures { pub metrics: bool, + pub logs_route: bool, } diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 1d7f53229..f8a50238a 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -14,18 +14,18 @@ default-run = "meilisearch" [dependencies] actix-cors = "0.7.0" -actix-http = { version = "3.5.1", default-features = false, features = [ +actix-http = { version = "3.6.0", default-features = false, features = [ "compress-brotli", "compress-gzip", - "rustls", + "rustls-0_21", ] } actix-utils = "3.0.1" -actix-web = { version = "4.4.1", default-features = false, features = [ +actix-web = { version = "4.5.1", default-features = false, features = [ "macros", "compress-brotli", "compress-gzip", "cookies", - "rustls", + "rustls-0_21", ] } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } anyhow = { version = "1.0.79", features = ["backtrace"] } @@ -42,7 +42,6 @@ crossbeam-channel = "0.5.11" deserr = { version = "0.6.1", features = ["actix-web"] } dump = { path = "../dump" } either = "1.9.0" -env_logger = "0.10.1" file-store = { path = "../file-store" } flate2 = "1.0.28" fst = "0.4.7" @@ -53,9 +52,8 @@ index-scheduler = { path = "../index-scheduler" } indexmap = { version = "2.1.0", features = ["serde"] } is-terminal = "0.4.10" itertools = "0.11.0" -jsonwebtoken = "8.3.0" +jsonwebtoken = "9.2.0" lazy_static = "1.4.0" -log = "0.4.20" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } mimalloc = { version = "0.1.39", default-features = false } @@ -77,7 +75,7 @@ reqwest = { version = "0.11.23", features = [ "rustls-tls", "json", ], default-features = false } -rustls = "0.20.8" +rustls = "0.21.6" rustls-pemfile = "1.0.2" segment = { version = "0.2.3", optional = true } serde = { version = "1.0.195", features = ["derive"] } @@ -105,6 +103,10 @@ yaup = "0.2.1" serde_urlencoded = "0.7.1" termcolor = "1.4.1" url = { version = "2.5.0", features = ["serde"] } +tracing = "0.1.40" +tracing-subscriber = "0.3.18" +tracing-trace = { version = "0.1.0", path = "../tracing-trace" } +tracing-actix-web = "0.7.9" [dev-dependencies] actix-rt = "2.9.0" diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 6969c9b3d..8bb7e8d81 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -250,6 +250,8 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_replication_parameters: bool, + experimental_enable_logs_route: bool, experimental_reduce_indexing_memory_usage: bool, experimental_max_number_of_batched_tasks: usize, db_path: bool, @@ -287,6 +289,8 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_replication_parameters, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, experimental_max_number_of_batched_tasks, http_addr, @@ -333,6 +337,8 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_replication_parameters, + experimental_enable_logs_route, experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 3bd8f3edd..a8351fd1f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -12,6 +12,8 @@ pub enum MeilisearchHttpError { #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", "))] MissingContentType(Vec), + #[error("The `/logs/stream` route is currently in use by someone else.")] + AlreadyUsedLogRoute, #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] CsvDelimiterWithWrongContentType(String), #[error( @@ -59,6 +61,7 @@ impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, + MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, diff --git a/meilisearch/src/extractors/sequential_extractor.rs b/meilisearch/src/extractors/sequential_extractor.rs index c04210616..23d6cb997 100644 --- a/meilisearch/src/extractors/sequential_extractor.rs +++ b/meilisearch/src/extractors/sequential_extractor.rs @@ -131,6 +131,7 @@ gen_seq! { SeqFromRequestFut3; A B C } gen_seq! { SeqFromRequestFut4; A B C D } gen_seq! { SeqFromRequestFut5; A B C D E } gen_seq! { SeqFromRequestFut6; A B C D E F } +gen_seq! { SeqFromRequestFut7; A B C D E F G } pin_project! { #[project = ExtractProj] diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index f1111962c..1ab161564 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -29,7 +29,6 @@ use error::PayloadError; use extractors::payload::PayloadConfig; use http::header::CONTENT_TYPE; use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; -use log::error; use meilisearch_auth::AuthController; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; @@ -39,6 +38,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; +use tracing::{error, info_span}; +use tracing_subscriber::filter::Targets; use crate::error::MeilisearchHttpError; @@ -86,10 +87,21 @@ fn is_empty_db(db_path: impl AsRef) -> bool { } } +/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`. +pub type LogRouteHandle = + tracing_subscriber::reload::Handle; + +pub type LogRouteType = tracing_subscriber::filter::Filtered< + Option + Send + Sync>>, + Targets, + tracing_subscriber::Registry, +>; + pub fn create_app( index_scheduler: Data, auth_controller: Data, opt: Opt, + logs: LogRouteHandle, analytics: Arc, enable_dashboard: bool, ) -> actix_web::App< @@ -108,6 +120,7 @@ pub fn create_app( index_scheduler.clone(), auth_controller.clone(), &opt, + logs, analytics.clone(), ) }) @@ -123,11 +136,49 @@ pub fn create_app( .allow_any_method() .max_age(86_400), // 24h ) - .wrap(actix_web::middleware::Logger::default()) + .wrap(tracing_actix_web::TracingLogger::::new()) .wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) } +struct AwebTracingLogger; + +impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger { + fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span { + use tracing::field::Empty; + + let conn_info = request.connection_info(); + let headers = request.headers(); + let user_agent = headers + .get(http::header::USER_AGENT) + .map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned()) + .unwrap_or_default(); + info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty) + } + + fn on_request_end( + span: tracing::Span, + outcome: &Result, actix_web::Error>, + ) { + match &outcome { + Ok(response) => { + let code: i32 = response.response().status().as_u16().into(); + span.record("status_code", code); + + if let Some(error) = response.response().error() { + // use the status code already constructed for the outgoing HTTP response + span.record("error", &tracing::field::display(error.as_response_error())); + } + } + Err(error) => { + let code: i32 = error.error_response().status().as_u16().into(); + span.record("status_code", code); + span.record("error", &tracing::field::display(error.as_response_error())); + } + }; + } +} + enum OnFailure { RemoveDb, KeepDb, @@ -200,7 +251,9 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, Arc< .name(String::from("register-snapshot-tasks")) .spawn(move || loop { thread::sleep(snapshot_delay); - if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) { + if let Err(e) = + index_scheduler.register(KindWithContent::SnapshotCreation, None, false) + { error!("Error while registering snapshot: {}", e); } }) @@ -235,6 +288,7 @@ fn open_or_create_database_unchecked( enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, + cleanup_enabled: !opt.experimental_replication_parameters, max_number_of_tasks: 1_000_000, max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks, index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize, @@ -280,15 +334,15 @@ fn import_dump( let mut dump_reader = dump::DumpReader::open(reader)?; if let Some(date) = dump_reader.date() { - log::info!( - "Importing a dump of meilisearch `{:?}` from the {}", - dump_reader.version(), // TODO: get the meilisearch version instead of the dump version - date + tracing::info!( + version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + %date, + "Importing a dump of meilisearch" ); } else { - log::info!( - "Importing a dump of meilisearch `{:?}`", - dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + tracing::info!( + version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + "Importing a dump of meilisearch", ); } @@ -322,7 +376,7 @@ fn import_dump( for index_reader in dump_reader.indexes()? { let mut index_reader = index_reader?; let metadata = index_reader.metadata(); - log::info!("Importing index `{}`.", metadata.uid); + tracing::info!("Importing index `{}`.", metadata.uid); let date = Some((metadata.created_at, metadata.updated_at)); let index = index_scheduler.create_raw_index(&metadata.uid, date)?; @@ -336,14 +390,15 @@ fn import_dump( } // 4.2 Import the settings. - log::info!("Importing the settings."); + tracing::info!("Importing the settings."); let settings = index_reader.settings()?; apply_settings_to_builder(&settings, &mut builder); - builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?; + builder + .execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?; // 4.3 Import the documents. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index. - log::info!("Importing the documents."); + tracing::info!("Importing the documents."); let file = tempfile::tempfile()?; let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); for document in index_reader.documents()? { @@ -365,15 +420,16 @@ fn import_dump( update_method: IndexDocumentsMethod::ReplaceDocuments, ..Default::default() }, - |indexing_step| log::trace!("update: {:?}", indexing_step), + |indexing_step| tracing::trace!("update: {:?}", indexing_step), || false, )?; let (builder, user_result) = builder.add_documents(reader)?; - log::info!("{} documents found.", user_result?); + let user_result = user_result?; + tracing::info!(documents_found = user_result, "{} documents found.", user_result); builder.execute()?; wtxn.commit()?; - log::info!("All documents successfully imported."); + tracing::info!("All documents successfully imported."); } let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; @@ -391,6 +447,7 @@ pub fn configure_data( index_scheduler: Data, auth: Data, opt: &Opt, + logs: LogRouteHandle, analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; @@ -398,6 +455,8 @@ pub fn configure_data( .app_data(index_scheduler) .app_data(auth) .app_data(web::Data::from(analytics)) + .app_data(web::Data::new(logs)) + .app_data(web::Data::new(opt.clone())) .app_data( web::JsonConfig::default() .limit(http_payload_size_limit) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index ddd37bbb6..1e067b43e 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -1,6 +1,7 @@ use std::env; use std::io::{stderr, Write}; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; use actix_web::http::KeepAlive; @@ -9,37 +10,60 @@ use actix_web::HttpServer; use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; -use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt}; +use meilisearch::{ + analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt, +}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; +use mimalloc::MiMalloc; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::SubscriberExt as _; +use tracing_subscriber::Layer; #[global_allocator] -static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; +static ALLOC: MiMalloc = MiMalloc; + +fn default_layer() -> LogRouteType { + None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF)) +} /// does all the setup before meilisearch is launched -fn setup(opt: &Opt) -> anyhow::Result<()> { - let mut log_builder = env_logger::Builder::new(); - let log_filters = format!( - "{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn", - opt.log_level +fn setup(opt: &Opt) -> anyhow::Result { + let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer()); + let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer; + + let subscriber = tracing_subscriber::registry().with(route_layer).with( + tracing_subscriber::fmt::layer() + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE) + .with_filter( + tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string()) + .unwrap(), + ), ); - log_builder.parse_filters(&log_filters); - log_builder.init(); + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); - Ok(()) + Ok(route_layer_handle) +} + +fn on_panic(info: &std::panic::PanicInfo) { + let info = info.to_string().replace('\n', " "); + tracing::error!(%info); } #[actix_web::main] async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + std::panic::set_hook(Box::new(on_panic)); + anyhow::ensure!( !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" ); - setup(&opt)?; + let log_handle = setup(&opt)?; match (opt.env.as_ref(), &opt.master_key) { ("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => { @@ -77,7 +101,7 @@ async fn main() -> anyhow::Result<()> { print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(index_scheduler, auth_controller, opt, analytics).await?; + run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; Ok(()) } @@ -86,6 +110,7 @@ async fn run_http( index_scheduler: Arc, auth_controller: Arc, opt: Opt, + logs: LogRouteHandle, analytics: Arc, ) -> anyhow::Result<()> { let enable_dashboard = &opt.env == "development"; @@ -98,6 +123,7 @@ async fn run_http( index_scheduler.clone(), auth_controller.clone(), opt.clone(), + logs.clone(), analytics.clone(), enable_dashboard, ) @@ -107,7 +133,7 @@ async fn run_http( .keep_alive(KeepAlive::Os); if let Some(config) = opt_clone.get_ssl_config()? { - http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?; + http_server.bind_rustls_021(opt_clone.http_addr, config)?.run().await?; } else { http_server.bind(&opt_clone.http_addr)?.run().await?; } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 3cff14aa0..657be00d0 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -51,6 +51,8 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS"; +const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; @@ -309,6 +311,23 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, + /// Experimental logs route feature. For more information, see: + /// + /// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs. + #[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)] + #[serde(default)] + pub experimental_enable_logs_route: bool, + + /// Enable multiple features that helps you to run meilisearch in a replicated context. + /// For more information, see: + /// + /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now + /// - Lets you specify a custom task ID upon registering a task + /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed) + #[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)] + #[serde(default)] + pub experimental_replication_parameters: bool, + /// Experimental RAM reduction during indexing, do not use in production, see: #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[serde(default)] @@ -414,6 +433,8 @@ impl Opt { #[cfg(feature = "analytics")] no_analytics, experimental_enable_metrics, + experimental_enable_logs_route, + experimental_replication_parameters, experimental_reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); @@ -470,6 +491,14 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, experimental_enable_metrics.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS, + experimental_replication_parameters.to_string(), + ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE, + experimental_enable_logs_route.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, experimental_reduce_indexing_memory_usage.to_string(), @@ -490,11 +519,11 @@ impl Opt { } if self.ssl_require_auth { let verifier = AllowAnyAuthenticatedClient::new(client_auth_roots); - config.with_client_cert_verifier(verifier) + config.with_client_cert_verifier(Arc::from(verifier)) } else { let verifier = AllowAnyAnonymousOrAuthenticatedClient::new(client_auth_roots); - config.with_client_cert_verifier(verifier) + config.with_client_cert_verifier(Arc::from(verifier)) } } None => config.with_no_client_auth(), @@ -524,7 +553,10 @@ impl Opt { } pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { - InstanceTogglableFeatures { metrics: self.experimental_enable_metrics } + InstanceTogglableFeatures { + metrics: self.experimental_enable_metrics, + logs_route: self.experimental_enable_logs_route, + } } } diff --git a/meilisearch/src/routes/dump.rs b/meilisearch/src/routes/dump.rs index 0aabd2aa6..7f3cd06a5 100644 --- a/meilisearch/src/routes/dump.rs +++ b/meilisearch/src/routes/dump.rs @@ -1,17 +1,18 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); @@ -21,6 +22,7 @@ pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); @@ -29,9 +31,13 @@ pub async fn create_dump( keys: auth_controller.list_keys()?, instance_uid: analytics.instance_uid().cloned(), }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Create dump"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index e7fd8de22..227b485c5 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use deserr::Deserr; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::ActionPolicy; @@ -33,20 +33,21 @@ async fn get_features( let features = index_scheduler.features(); analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); - debug!("returns: {:?}", features.runtime_features()); - HttpResponse::Ok().json(features.runtime_features()) + let features = features.runtime_features(); + debug!(returns = ?features, "Get features"); + HttpResponse::Ok().json(features) } #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct RuntimeTogglableFeatures { - #[deserr(default)] - pub score_details: Option, #[deserr(default)] pub vector_store: Option, #[deserr(default)] pub metrics: Option, #[deserr(default)] + pub logs_route: Option, + #[deserr(default)] pub export_puffin_reports: Option, } @@ -60,12 +61,13 @@ async fn patch_features( analytics: Data, ) -> Result { let features = index_scheduler.features(); + debug!(parameters = ?new_features, "Patch features"); let old_features = features.runtime_features(); let new_features = meilisearch_types::features::RuntimeTogglableFeatures { - score_details: new_features.0.score_details.unwrap_or(old_features.score_details), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), metrics: new_features.0.metrics.unwrap_or(old_features.metrics), + logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route), export_puffin_reports: new_features .0 .export_puffin_reports @@ -76,22 +78,23 @@ async fn patch_features( // the it renames to camelCase, which we don't want for analytics. // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future. let meilisearch_types::features::RuntimeTogglableFeatures { - score_details, vector_store, metrics, + logs_route, export_puffin_reports, } = new_features; analytics.publish( "Experimental features Updated".to_string(), json!({ - "score_details": score_details, "vector_store": vector_store, "metrics": metrics, + "logs_route": logs_route, "export_puffin_reports": export_puffin_reports, }), Some(&req), ); index_scheduler.put_runtime_features(new_features)?; + debug!(returns = ?new_features, "Patch features"); Ok(HttpResponse::Ok().json(new_features)) } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index 6d59f60dd..43fab1dae 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -7,8 +7,7 @@ use bstr::ByteSlice as _; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; -use index_scheduler::IndexScheduler; -use log::debug; +use index_scheduler::{IndexScheduler, TaskId}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; @@ -28,6 +27,7 @@ use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; +use tracing::debug; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; @@ -36,8 +36,11 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use crate::routes::{ + get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, +}; use crate::search::parse_filter; +use crate::Opt; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] @@ -101,6 +104,7 @@ pub async fn get_document( analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); + debug!(parameters = ?params, "Get document"); let index_uid = IndexUid::try_from(index_uid)?; analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); @@ -110,7 +114,7 @@ pub async fn get_document( let index = index_scheduler.index(&index_uid)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; - debug!("returns: {:?}", document); + debug!(returns = ?document, "Get document"); Ok(HttpResponse::Ok().json(document)) } @@ -118,6 +122,7 @@ pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = path.into_inner(); @@ -129,8 +134,12 @@ pub async fn delete_document( index_uid: index_uid.to_string(), documents_ids: vec![document_id], }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -168,9 +177,8 @@ pub async fn documents_by_query_post( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with body: {:?}", body); - let body = body.into_inner(); + debug!(parameters = ?body, "Get documents POST"); analytics.post_fetch_documents( &DocumentFetchKind::Normal { @@ -191,7 +199,7 @@ pub async fn get_documents( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", params); + debug!(parameters = ?params, "Get documents GET"); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); @@ -235,7 +243,7 @@ fn documents_by_query( let ret = PaginationView::new(offset, limit, total as usize, documents); - debug!("returns: {:?}", ret); + debug!(returns = ?ret, "Get documents"); Ok(HttpResponse::Ok().json(ret)) } @@ -267,16 +275,19 @@ pub async fn replace_documents( params: AwebQueryParameter, body: Payload, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug!("called with params: {:?}", params); + debug!(parameters = ?params, "Replace documents"); let params = params.into_inner(); analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -285,9 +296,12 @@ pub async fn replace_documents( params.csv_delimiter, body, IndexDocumentsMethod::ReplaceDocuments, + uid, + dry_run, allow_index_creation, ) .await?; + debug!(returns = ?task, "Replace documents"); Ok(HttpResponse::Accepted().json(task)) } @@ -298,16 +312,19 @@ pub async fn update_documents( params: AwebQueryParameter, body: Payload, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - debug!("called with params: {:?}", params); let params = params.into_inner(); + debug!(parameters = ?params, "Update documents"); analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task = document_addition( extract_mime_type(&req)?, index_scheduler, @@ -316,9 +333,12 @@ pub async fn update_documents( params.csv_delimiter, body, IndexDocumentsMethod::UpdateDocuments, + uid, + dry_run, allow_index_creation, ) .await?; + debug!(returns = ?task, "Update documents"); Ok(HttpResponse::Accepted().json(task)) } @@ -332,6 +352,8 @@ async fn document_addition( csv_delimiter: Option, mut body: Payload, method: IndexDocumentsMethod, + task_id: Option, + dry_run: bool, allow_index_creation: bool, ) -> Result { let format = match ( @@ -364,7 +386,7 @@ async fn document_addition( } }; - let (uuid, mut update_file) = index_scheduler.create_update_file()?; + let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?; let temp_file = match tempfile() { Ok(file) => file, @@ -403,11 +425,9 @@ async fn document_addition( let read_file = buffer.into_inner().into_std().await; let documents_count = tokio::task::spawn_blocking(move || { let documents_count = match format { - PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?, - PayloadType::Csv { delimiter } => { - read_csv(&read_file, update_file.as_file_mut(), delimiter)? - } - PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?, + PayloadType::Json => read_json(&read_file, &mut update_file)?, + PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?, + PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?, }; // we NEED to persist the file here because we moved the `udpate_file` in another task. update_file.persist()?; @@ -427,7 +447,10 @@ async fn document_addition( Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) if e.kind() == ErrorKind::NotFound => {} Err(e) => { - log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); + tracing::warn!( + index_uuid = %uuid, + "Unknown error happened while deleting a malformed update file: {e}" + ); } } // We still want to return the original error to the end user. @@ -445,7 +468,9 @@ async fn document_addition( }; let scheduler = index_scheduler.clone(); - let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? { + let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run)) + .await? + { Ok(task) => task, Err(e) => { index_scheduler.delete_update_file(uuid)?; @@ -453,7 +478,6 @@ async fn document_addition( } }; - debug!("returns: {:?}", task); Ok(task.into()) } @@ -462,9 +486,10 @@ pub async fn delete_documents_batch( index_uid: web::Path, body: web::Json>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug!(parameters = ?body, "Delete documents by batch"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); @@ -476,10 +501,14 @@ pub async fn delete_documents_batch( let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete documents by batch"); Ok(HttpResponse::Accepted().json(task)) } @@ -495,9 +524,10 @@ pub async fn delete_documents_by_filter( index_uid: web::Path, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug!(parameters = ?body, "Delete documents by filter"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; @@ -512,10 +542,14 @@ pub async fn delete_documents_by_filter( .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete documents by filter"); Ok(HttpResponse::Accepted().json(task)) } @@ -523,16 +557,21 @@ pub async fn clear_all_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete all documents"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 4b5d4d78a..a980fb278 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -2,12 +2,12 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use serde_json::Value; +use tracing::debug; use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::extractors::authentication::policies::*; @@ -56,7 +56,7 @@ pub async fn search( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); - debug!("facet search called with params: {:?}", query); + debug!(parameters = ?query, "Facet search"); let mut aggregate = FacetSearchAggregator::from_query(&query, &req); @@ -83,7 +83,7 @@ pub async fn search( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug!(returns = ?search_result, "Facet search"); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/mod.rs b/meilisearch/src/routes/indexes/mod.rs index 81b5c3f2e..59fa02dff 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/meilisearch/src/routes/indexes/mod.rs @@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::{DeserializeError, Deserr, ValuePointerRef}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -16,12 +15,15 @@ use meilisearch_types::tasks::KindWithContent; use serde::Serialize; use serde_json::json; use time::OffsetDateTime; +use tracing::debug; -use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; +use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::is_dry_run; +use crate::Opt; pub mod documents; pub mod facet_search; @@ -93,6 +95,7 @@ pub async fn list_indexes( index_scheduler: GuardedData, Data>, paginate: AwebQueryParameter, ) -> Result { + debug!(parameters = ?paginate, "List indexes"); let filters = index_scheduler.filters(); let indexes: Vec> = index_scheduler.try_for_each_index(|uid, index| -> Result, _> { @@ -105,7 +108,7 @@ pub async fn list_indexes( let indexes: Vec = indexes.into_iter().flatten().collect(); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); - debug!("returns: {:?}", ret); + debug!(returns = ?ret, "List indexes"); Ok(HttpResponse::Ok().json(ret)) } @@ -122,8 +125,10 @@ pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { + debug!(parameters = ?body, "Create index"); let IndexCreateRequest { primary_key, uid } = body.into_inner(); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); @@ -135,8 +140,13 @@ pub async fn create_index( ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + debug!(returns = ?task, "Create index"); Ok(HttpResponse::Accepted().json(task)) } else { @@ -177,7 +187,7 @@ pub async fn get_index( let index = index_scheduler.index(&index_uid)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?; - debug!("returns: {:?}", index_view); + debug!(returns = ?index_view, "Get index"); Ok(HttpResponse::Ok().json(index_view)) } @@ -187,9 +197,10 @@ pub async fn update_index( index_uid: web::Path, body: AwebJson, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", body); + debug!(parameters = ?body, "Update index"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( @@ -203,21 +214,32 @@ pub async fn update_index( primary_key: body.primary_key, }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Update index"); Ok(HttpResponse::Accepted().json(task)) } pub async fn delete_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, + req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + debug!(returns = ?task, "Delete index"); Ok(HttpResponse::Accepted().json(task)) } @@ -255,6 +277,6 @@ pub async fn get_index_stats( let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); - debug!("returns: {:?}", stats); + debug!(returns = ?stats, "Get index stats"); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index c474d285e..3adfce970 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -2,7 +2,6 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use index_scheduler::IndexScheduler; -use log::{debug, warn}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -12,6 +11,7 @@ use meilisearch_types::milli; use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; +use tracing::{debug, warn}; use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; @@ -186,7 +186,7 @@ pub async fn search_with_url_query( req: HttpRequest, analytics: web::Data, ) -> Result { - debug!("called with params: {:?}", params); + debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query: SearchQuery = params.into_inner().into(); @@ -213,7 +213,7 @@ pub async fn search_with_url_query( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug!(returns = ?search_result, "Search get"); Ok(HttpResponse::Ok().json(search_result)) } @@ -227,7 +227,7 @@ pub async fn search_with_post( let index_uid = IndexUid::try_from(index_uid.into_inner())?; let mut query = params.into_inner(); - debug!("search called with params: {:?}", query); + debug!(parameters = ?query, "Search post"); // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { @@ -252,7 +252,7 @@ pub async fn search_with_post( let search_result = search_result?; - debug!("returns: {:?}", search_result); + debug!(returns = ?search_result, "Search post"); Ok(HttpResponse::Ok().json(search_result)) } diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index decc4ffc9..c71d83279 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -2,7 +2,6 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::facet_values_sort::FacetValuesSort; @@ -11,11 +10,13 @@ use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; +use crate::Opt; #[macro_export] macro_rules! make_setting_route { @@ -24,17 +25,18 @@ macro_rules! make_setting_route { use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse, Resource}; use index_scheduler::IndexScheduler; - use log::debug; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::update::Setting; use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::tasks::KindWithContent; + use tracing::debug; use $crate::analytics::Analytics; use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::routes::SummarizedTaskView; + use $crate::Opt; + use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; pub async fn delete( index_scheduler: GuardedData< @@ -42,6 +44,8 @@ macro_rules! make_setting_route { Data, >, index_uid: web::Path, + req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -56,12 +60,14 @@ macro_rules! make_setting_route { is_deletion: true, allow_index_creation, }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -73,11 +79,13 @@ macro_rules! make_setting_route { index_uid: actix_web::web::Path, body: deserr::actix_web::AwebJson, $err_ty>, req: HttpRequest, + opt: web::Data, $analytics_var: web::Data, ) -> std::result::Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); + debug!(parameters = ?body, "Update settings"); #[allow(clippy::redundant_closure_call)] $analytics(&body, &req); @@ -104,12 +112,14 @@ macro_rules! make_setting_route { is_deletion: false, allow_index_creation, }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)) + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) .await?? .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Update settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -126,7 +136,7 @@ macro_rules! make_setting_route { let rtxn = index.read_txn()?; let settings = settings(&index, &rtxn)?; - debug!("returns: {:?}", settings); + debug!(returns = ?settings, "Update settings"); let mut json = serde_json::json!(&settings); let val = json[$camelcase_attr].take(); @@ -651,11 +661,13 @@ pub async fn update_all( index_uid: web::Path, body: AwebJson, DeserrJsonError>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let new_settings = body.into_inner(); + debug!(parameters = ?new_settings, "Update all settings"); let new_settings = validate_settings(new_settings, &index_scheduler)?; analytics.publish( @@ -765,10 +777,14 @@ pub async fn update_all( is_deletion: false, allow_index_creation, }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Update all settings"); Ok(HttpResponse::Accepted().json(task)) } @@ -781,13 +797,15 @@ pub async fn get_all( let index = index_scheduler.index(&index_uid)?; let rtxn = index.read_txn()?; let new_settings = settings(&index, &rtxn)?; - debug!("returns: {:?}", new_settings); + debug!(returns = ?new_settings, "Get all settings"); Ok(HttpResponse::Ok().json(new_settings)) } pub async fn delete_all( index_scheduler: GuardedData, Data>, index_uid: web::Path, + req: HttpRequest, + opt: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -801,10 +819,14 @@ pub async fn delete_all( is_deletion: true, allow_index_creation, }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Delete all settings"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/logs.rs b/meilisearch/src/routes/logs.rs new file mode 100644 index 000000000..d95f80bb8 --- /dev/null +++ b/meilisearch/src/routes/logs.rs @@ -0,0 +1,281 @@ +use std::convert::Infallible; +use std::io::Write; +use std::ops::ControlFlow; +use std::pin::Pin; +use std::str::FromStr; +use std::sync::Arc; + +use actix_web::web::{Bytes, Data}; +use actix_web::{web, HttpResponse}; +use deserr::actix_web::AwebJson; +use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef}; +use futures_util::Stream; +use index_scheduler::IndexScheduler; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::deserr_codes::*; +use meilisearch_types::error::{Code, ResponseError}; +use tokio::sync::mpsc; +use tracing_subscriber::filter::Targets; +use tracing_subscriber::Layer; + +use crate::error::MeilisearchHttpError; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; +use crate::LogRouteHandle; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service( + web::resource("stream") + .route(web::post().to(SeqHandler(get_logs))) + .route(web::delete().to(SeqHandler(cancel_logs))), + ); +} + +#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)] +#[deserr(rename_all = camelCase)] +pub enum LogMode { + #[default] + Human, + Profile, +} + +/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it. +#[derive(Clone, Debug)] +struct MyTargets(Targets); + +/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it. +#[derive(Debug, thiserror::Error)] +enum MyParseError { + #[error(transparent)] + ParseError(#[from] tracing_subscriber::filter::ParseError), + #[error( + "Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`" + )] + Example, +} + +impl FromStr for MyTargets { + type Err = MyParseError; + + fn from_str(s: &str) -> Result { + if s.is_empty() { + Err(MyParseError::Example) + } else { + Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?)) + } + } +} + +impl MergeWithError for DeserrJsonError { + fn merge( + _self_: Option, + other: MyParseError, + merge_location: ValuePointerRef, + ) -> ControlFlow { + Self::error::( + None, + ErrorKind::Unexpected { msg: other.to_string() }, + merge_location, + ) + } +} + +#[derive(Debug, Deserr)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError)] +pub struct GetLogs { + #[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError)] + target: MyTargets, + + #[deserr(default, error = DeserrJsonError)] + mode: LogMode, + + #[deserr(default = false, error = DeserrJsonError)] + profile_memory: bool, +} + +fn validate_get_logs( + logs: GetLogs, + location: ValuePointerRef, +) -> Result { + if logs.profile_memory && logs.mode != LogMode::Profile { + Err(deserr::take_cf_content(E::error::( + None, + ErrorKind::Unexpected { + msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode), + }, + location, + ))) + } else { + Ok(logs) + } +} + +struct LogWriter { + sender: mpsc::UnboundedSender>, +} + +impl Write for LogWriter { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?; + Ok(buf.len()) + } + + fn flush(&mut self) -> std::io::Result<()> { + Ok(()) + } +} + +struct HandleGuard { + /// We need to keep an handle on the logs to make it available again when the streamer is dropped + logs: Arc, +} + +impl Drop for HandleGuard { + fn drop(&mut self) { + if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) { + tracing::error!("Could not free the logs route: {e}"); + } + } +} + +fn byte_stream( + receiver: mpsc::UnboundedReceiver>, + guard: HandleGuard, +) -> impl futures_util::Stream> { + futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move { + let vec = receiver.recv().await; + + vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard))) + }) +} + +type PinnedByteStream = Pin>>>; + +fn make_layer< + S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>, +>( + opt: &GetLogs, + logs: Data, +) -> (Box + Send + Sync>, PinnedByteStream) { + let guard = HandleGuard { logs: logs.into_inner() }; + match opt.mode { + LogMode::Human => { + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + + let fmt_layer = tracing_subscriber::fmt::layer() + .with_writer(move || LogWriter { sender: sender.clone() }) + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE); + + let stream = byte_stream(receiver, guard); + (Box::new(fmt_layer) as Box + Send + Sync>, Box::pin(stream)) + } + LogMode::Profile => { + let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory); + + let stream = entry_stream(trace, guard); + + (Box::new(layer) as Box + Send + Sync>, Box::pin(stream)) + } + } +} + +fn entry_stream( + trace: tracing_trace::Trace, + guard: HandleGuard, +) -> impl Stream> { + let receiver = trace.into_receiver(); + let entry_buf = Vec::new(); + + futures_util::stream::unfold( + (receiver, entry_buf, guard), + move |(mut receiver, mut entry_buf, guard)| async move { + let mut bytes = Vec::new(); + + while bytes.len() < 8192 { + entry_buf.clear(); + + let Ok(count) = tokio::time::timeout( + std::time::Duration::from_secs(1), + receiver.recv_many(&mut entry_buf, 100), + ) + .await + else { + break; + }; + + if count == 0 { + if !bytes.is_empty() { + break; + } + + // channel closed, exit + return None; + } + + for entry in &entry_buf { + if let Err(error) = serde_json::to_writer(&mut bytes, entry) { + tracing::error!( + error = &error as &dyn std::error::Error, + "deserializing entry" + ); + return Some(( + Err(ResponseError::from_msg( + format!("error deserializing entry: {error}"), + Code::Internal, + )), + (receiver, entry_buf, guard), + )); + } + } + } + + Some((Ok(bytes.into()), (receiver, entry_buf, guard))) + }, + ) +} + +pub async fn get_logs( + index_scheduler: GuardedData, Data>, + logs: Data, + body: AwebJson, +) -> Result { + index_scheduler.features().check_logs_route()?; + + let opt = body.into_inner(); + let mut stream = None; + + logs.modify(|layer| match layer.inner_mut() { + None => { + // there is no one getting logs + *layer.filter_mut() = opt.target.0.clone(); + let (new_layer, new_stream) = make_layer(&opt, logs.clone()); + + *layer.inner_mut() = Some(new_layer); + stream = Some(new_stream); + } + Some(_) => { + // there is already someone getting logs + } + }) + .unwrap(); + + if let Some(stream) = stream { + Ok(HttpResponse::Ok().streaming(stream)) + } else { + Err(MeilisearchHttpError::AlreadyUsedLogRoute.into()) + } +} + +pub async fn cancel_logs( + index_scheduler: GuardedData, Data>, + logs: Data, +) -> Result { + index_scheduler.features().check_logs_route()?; + + if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) { + tracing::error!("Could not free the logs route: {e}"); + } + + Ok(HttpResponse::NoContent().finish()) +} diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 0e5623b09..249103e12 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -3,18 +3,19 @@ use std::collections::BTreeMap; use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_auth::AuthController; -use meilisearch_types::error::ResponseError; +use meilisearch_types::error::{Code, ResponseError}; use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; +use crate::Opt; const PAGINATION_DEFAULT_LIMIT: usize = 20; @@ -22,6 +23,7 @@ mod api_key; mod dump; pub mod features; pub mod indexes; +mod logs; mod metrics; mod multi_search; mod snapshot; @@ -31,6 +33,7 @@ pub mod tasks; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::scope("/tasks").configure(tasks::configure)) .service(web::resource("/health").route(web::get().to(get_health))) + .service(web::scope("/logs").configure(logs::configure)) .service(web::scope("/keys").configure(api_key::configure)) .service(web::scope("/dumps").configure(dump::configure)) .service(web::scope("/snapshots").configure(snapshot::configure)) @@ -43,6 +46,56 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/experimental-features").configure(features::configure)); } +pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result, ResponseError> { + if !opt.experimental_replication_parameters { + return Ok(None); + } + let task_id = req + .headers() + .get("TaskId") + .map(|header| { + header.to_str().map_err(|e| { + ResponseError::from_msg( + format!("TaskId is not a valid utf-8 string: {e}"), + Code::BadRequest, + ) + }) + }) + .transpose()? + .map(|s| { + s.parse::().map_err(|e| { + ResponseError::from_msg( + format!( + "Could not parse the TaskId as a {}: {e}", + std::any::type_name::(), + ), + Code::BadRequest, + ) + }) + }) + .transpose()?; + Ok(task_id) +} + +pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result { + if !opt.experimental_replication_parameters { + return Ok(false); + } + Ok(req + .headers() + .get("DryRun") + .map(|header| { + header.to_str().map_err(|e| { + ResponseError::from_msg( + format!("DryRun is not a valid utf-8 string: {e}"), + Code::BadRequest, + ) + }) + }) + .transpose()? + .map_or(false, |s| s.to_lowercase() == "true")) +} + #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct SummarizedTaskView { @@ -250,7 +303,7 @@ async fn get_stats( let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; - debug!("returns: {:?}", stats); + debug!(returns = ?stats, "Get stats"); Ok(HttpResponse::Ok().json(stats)) } diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index 8e81688e6..86aa58e70 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -3,11 +3,11 @@ use actix_web::web::{self, Data}; use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; use serde::Serialize; +use tracing::debug; use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::extractors::authentication::policies::ActionPolicy; @@ -52,7 +52,7 @@ pub async fn multi_search_with_post( for (query_index, (index_uid, mut query)) in queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() { - debug!("multi-search #{query_index}: called with params: {:?}", query); + debug!(on_index = query_index, parameters = ?query, "Multi-search"); // Check index from API key if !index_scheduler.filters().is_index_authorized(&index_uid) { @@ -107,7 +107,7 @@ pub async fn multi_search_with_post( err })?; - debug!("returns: {:?}", search_results); + debug!(returns = ?search_results, "Multi-search"); Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) } diff --git a/meilisearch/src/routes/snapshot.rs b/meilisearch/src/routes/snapshot.rs index 7fa22658a..84673729f 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/meilisearch/src/routes/snapshot.rs @@ -1,16 +1,17 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; -use log::debug; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; use serde_json::json; +use tracing::debug; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::SummarizedTaskView; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); @@ -19,14 +20,19 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub async fn create_snapshot( index_scheduler: GuardedData, Data>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); let task = KindWithContent::SnapshotCreation; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); - debug!("returns: {:?}", task); + debug!(returns = ?task, "Create snapshot"); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/swap_indexes.rs b/meilisearch/src/routes/swap_indexes.rs index 79e619705..51a7b0707 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/meilisearch/src/routes/swap_indexes.rs @@ -10,12 +10,13 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; use serde_json::json; -use super::SummarizedTaskView; +use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes)))); @@ -32,6 +33,7 @@ pub async fn swap_indexes( index_scheduler: GuardedData, Data>, params: AwebJson, DeserrJsonError>, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -60,7 +62,11 @@ pub async fn swap_indexes( } let task = KindWithContent::IndexSwap { swaps }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 03b63001d..f35d97fe6 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -18,11 +18,12 @@ use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; -use super::SummarizedTaskView; +use super::{get_task_id, is_dry_run, SummarizedTaskView}; use crate::analytics::Analytics; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::Opt; const DEFAULT_LIMIT: u32 = 20; @@ -161,6 +162,7 @@ async fn cancel_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -197,7 +199,11 @@ async fn cancel_tasks( let task_cancelation = KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; - let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task = + task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run)) + .await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) @@ -207,6 +213,7 @@ async fn delete_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, + opt: web::Data, analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -242,7 +249,10 @@ async fn delete_tasks( let task_deletion = KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; - let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run)) + .await??; let task: SummarizedTaskView = task.into(); Ok(HttpResponse::Ok().json(task)) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 551f89216..27de36c6d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -441,10 +441,6 @@ fn prepare_search<'t>( ScoringStrategy::Skip }); - if query.show_ranking_score_details { - features.check_score_details()?; - } - if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid { search.embedder_name(embedder); } diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index af028060d..d26bb26b8 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy hashset!{"snapshots.create", "snapshots.*", "*"}, ("GET", "/version") => hashset!{"version", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, + ("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"}, + ("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 4992eeb13..16fc10e98 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -100,16 +100,11 @@ impl Index<'_> { pub async fn raw_add_documents( &self, payload: &str, - content_type: Option<&str>, + headers: Vec<(&str, &str)>, query_parameter: &str, ) -> (Value, StatusCode) { let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter); - - if let Some(content_type) = content_type { - self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await - } else { - self.service.post_str(url, payload, Vec::new()).await - } + self.service.post_str(url, payload, headers).await } pub async fn update_documents( diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index 27feb187f..134124cc8 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch}; use once_cell::sync::Lazy; use tempfile::TempDir; use tokio::time::sleep; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::Layer; use super::index::Index; use super::service::Service; @@ -81,10 +83,16 @@ impl Server { Response = ServiceResponse, Error = actix_web::Error, > { + let (_route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + actix_web::test::init_service(create_app( self.service.index_scheduler.clone().into(), self.service.auth.clone().into(), self.service.options.clone(), + route_layer_handle, analytics::MockAnalytics::new(&self.service.options), true, )) diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index 078ddb6e5..4c23a18d8 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -7,6 +7,8 @@ use actix_web::test::TestRequest; use index_scheduler::IndexScheduler; use meilisearch::{analytics, create_app, Opt}; use meilisearch_auth::AuthController; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::Layer; use crate::common::encoder::Encoder; use crate::common::Value; @@ -105,10 +107,16 @@ impl Service { } pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + let (_route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + let app = test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), self.options.clone(), + route_layer_handle, analytics::MockAnalytics::new(&self.options), true, )) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 9733f7741..e6af85229 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1,10 +1,11 @@ use actix_web::test; use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; use crate::common::encoder::Encoder; -use crate::common::{GetAllDocumentsOptions, Server, Value}; +use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value}; use crate::json; /// This is the basic usage of our API and every other tests uses the content-type application/json @@ -2157,3 +2158,49 @@ async fn batch_several_documents_addition() { assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 120); } + +#[actix_rt::test] +async fn dry_register_file() { + let temp = tempfile::tempdir().unwrap(); + + let options = + Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + let index = server.index("tamo"); + + let documents = r#" + { + "id": "12", + "doggo": "kefir" + } + "#; + + let (response, code) = index + .raw_add_documents( + documents, + vec![("Content-Type", "application/json"), ("DryRun", "true")], + "", + ) + .await; + snapshot!(response, @r###" + { + "taskUid": 0, + "indexUid": "tamo", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + snapshot!(code, @"202 Accepted"); + + let (response, code) = index.get_task(response.uid()).await; + snapshot!(response, @r###" + { + "message": "Task `0` not found.", + "code": "task_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#task_not_found" + } + "###); + snapshot!(code, @"404 Not Found"); +} diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index bd06aabce..cd2d89813 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -209,7 +209,8 @@ async fn replace_documents_missing_payload() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", Some("application/json"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -220,7 +221,8 @@ async fn replace_documents_missing_payload() { } "###); - let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -231,7 +233,8 @@ async fn replace_documents_missing_payload() { } "###); - let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await; + let (response, code) = + index.raw_add_documents("", vec![("Content-Type", "text/csv")], "").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -287,7 +290,7 @@ async fn replace_documents_missing_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", None, "").await; + let (response, code) = index.raw_add_documents("", Vec::new(), "").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -299,7 +302,7 @@ async fn replace_documents_missing_content_type() { "###); // even with a csv delimiter specified this error is triggered first - let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await; + let (response, code) = index.raw_add_documents("", Vec::new(), "?csvDelimiter=;").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -345,7 +348,7 @@ async fn replace_documents_bad_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.raw_add_documents("", Some("doggo"), "").await; + let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -379,8 +382,9 @@ async fn replace_documents_bad_csv_delimiter() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter") + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -391,8 +395,9 @@ async fn replace_documents_bad_csv_delimiter() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=doggo") + .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -404,7 +409,11 @@ async fn replace_documents_bad_csv_delimiter() { "###); let (response, code) = index - .raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰"))) + .raw_add_documents( + "", + vec![("Content-Type", "application/json")], + &format!("?csvDelimiter={}", encode("🍰")), + ) .await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" @@ -469,8 +478,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = - index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a") + .await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { @@ -481,8 +491,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() { } "###); - let (response, code) = - index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await; + let (response, code) = index + .raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "?csvDelimiter=a") + .await; snapshot!(code, @"415 Unsupported Media Type"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index fd34268a5..e8061ae4a 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1845,9 +1845,9 @@ async fn import_dump_v6_containing_experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": false, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index abb006ac8..3a9812f30 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -18,9 +18,9 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": false, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -30,9 +30,9 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -42,9 +42,9 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -55,9 +55,9 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -68,9 +68,9 @@ async fn experimental_features() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -88,9 +88,9 @@ async fn experimental_feature_metrics() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": false, "metrics": true, + "logsRoute": false, "exportPuffinReports": false } "###); @@ -146,7 +146,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`", + "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index 7ce56d440..b309b83c6 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -2,9 +2,10 @@ use actix_web::http::header::ContentType; use actix_web::test; use http::header::ACCEPT_ENCODING; use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; use crate::common::encoder::Encoder; -use crate::common::{Server, Value}; +use crate::common::{default_settings, Server, Value}; use crate::json; #[actix_rt::test] @@ -199,3 +200,79 @@ async fn error_create_with_invalid_index_uid() { } "###); } + +#[actix_rt::test] +async fn send_task_id() { + let temp = tempfile::tempdir().unwrap(); + + let options = + Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) }; + let server = Server::new_with_options(options).await.unwrap(); + + let app = server.init_web_app().await; + let index = server.index("catto"); + let (response, code) = index.create(None).await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "catto", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + + let body = serde_json::to_string(&json!({ + "uid": "doggo", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(("TaskId", "25")) + .insert_header(ContentType::json()) + .set_payload(body) + .to_request(); + + let res = test::call_service(&app, req).await; + snapshot!(res.status(), @"202 Accepted"); + + let bytes = test::read_body(res).await; + let response = serde_json::from_slice::(&bytes).expect("Expecting valid json"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 25, + "indexUid": "doggo", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + + let body = serde_json::to_string(&json!({ + "uid": "girafo", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(("TaskId", "12")) + .insert_header(ContentType::json()) + .set_payload(body) + .to_request(); + + let res = test::call_service(&app, req).await; + snapshot!(res.status(), @"400 Bad Request"); + + let bytes = test::read_body(res).await; + let response = serde_json::from_slice::(&bytes).expect("Expecting valid json"); + snapshot!(json_string!(response), @r###" + { + "message": "Received bad task id: 12 should be >= to 26.", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} diff --git a/meilisearch/tests/integration.rs b/meilisearch/tests/integration.rs index b6992791a..943af802a 100644 --- a/meilisearch/tests/integration.rs +++ b/meilisearch/tests/integration.rs @@ -5,6 +5,7 @@ mod documents; mod dumps; mod features; mod index; +mod logs; mod search; mod settings; mod snapshot; diff --git a/meilisearch/tests/logs/error.rs b/meilisearch/tests/logs/error.rs new file mode 100644 index 000000000..4f4d741e3 --- /dev/null +++ b/meilisearch/tests/logs/error.rs @@ -0,0 +1,182 @@ +use meili_snap::*; + +use crate::common::Server; +use crate::json; + +#[actix_rt::test] +async fn logs_stream_bad_target() { + let server = Server::new().await; + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.target`: expected a string, but found an array: `[]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Our help message + let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // An error from the target parser + let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_stream_bad_mode() { + let server = Server::new().await; + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong value + let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_stream_bad_profile_memory() { + let server = Server::new().await; + + // Wrong type + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Wrong type + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // Used with default parameters + let (response, code) = + server.service.post("/logs/stream", json!({ "profileMemory": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.", + "code": "invalid_settings_typo_tolerance", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance" + } + "###); + + // Used with an unsupported mode + let (response, code) = + server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} + +#[actix_rt::test] +async fn logs_stream_without_enabling_the_route() { + let server = Server::new().await; + + let (response, code) = server.service.post("/logs/stream", json!({})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + + let (response, code) = server.service.delete("/logs/stream").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); +} diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs new file mode 100644 index 000000000..0002fe33c --- /dev/null +++ b/meilisearch/tests/logs/mod.rs @@ -0,0 +1,92 @@ +mod error; + +use std::rc::Rc; +use std::str::FromStr; + +use actix_web::http::header::ContentType; +use meili_snap::snapshot; +use meilisearch::{analytics, create_app, Opt}; +use tracing::level_filters::LevelFilter; +use tracing_subscriber::layer::SubscriberExt; +use tracing_subscriber::Layer; + +use crate::common::{default_settings, Server}; +use crate::json; + +#[actix_web::test] +async fn basic_test_log_stream_route() { + let db_path = tempfile::tempdir().unwrap(); + let server = Server::new_with_options(Opt { + experimental_enable_logs_route: true, + ..default_settings(db_path.path()) + }) + .await + .unwrap(); + + let (route_layer, route_layer_handle) = + tracing_subscriber::reload::Layer::new(None.with_filter( + tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), + )); + + let subscriber = tracing_subscriber::registry().with(route_layer).with( + tracing_subscriber::fmt::layer() + .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) + .with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()), + ); + + let app = actix_web::test::init_service(create_app( + server.service.index_scheduler.clone().into(), + server.service.auth.clone().into(), + server.service.options.clone(), + route_layer_handle, + analytics::MockAnalytics::new(&server.service.options), + true, + )) + .await; + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); + + let app = Rc::new(app); + + // First, we start listening on the `/logs/stream` route + let handle_app = app.clone(); + let handle = tokio::task::spawn_local(async move { + let req = actix_web::test::TestRequest::post() + .uri("/logs/stream") + .insert_header(ContentType::json()) + .set_payload( + serde_json::to_vec(&json!({ + "mode": "human", + "target": "info", + })) + .unwrap(), + ); + let req = req.to_request(); + let ret = actix_web::test::call_service(&*handle_app, req).await; + actix_web::test::read_body(ret).await + }); + + // We're going to create an index to get at least one info log saying we processed a batch of task + let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await; + snapshot!(ret, @r###" + { + "taskUid": 0, + "indexUid": "tamo", + "status": "enqueued", + "type": "indexCreation", + "enqueuedAt": "[date]" + } + "###); + server.wait_task(ret.uid()).await; + + let req = actix_web::test::TestRequest::delete().uri("/logs/stream"); + let req = req.to_request(); + let ret = actix_web::test::call_service(&*app, req).await; + let code = ret.status(); + snapshot!(code, @"204 No Content"); + + let logs = handle.await.unwrap(); + let logs = String::from_utf8(logs.to_vec()).unwrap(); + assert!(logs.contains("INFO"), "{logs}"); +} diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 6ea9920f6..85bc96d86 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -13,9 +13,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "scoreDetails": false, "vectorStore": true, "metrics": false, + "logsRoute": false, "exportPuffinReports": false } "###); diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 9b7b01029..90098c5b6 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -766,38 +766,14 @@ async fn faceting_max_values_per_facet() { } #[actix_rt::test] -async fn experimental_feature_score_details() { +async fn test_score_details() { let server = Server::new().await; let index = server.index("test"); let documents = DOCUMENTS.clone(); - index.add_documents(json!(documents), None).await; - index.wait_task(0).await; - - index - .search( - json!({ - "q": "train dragon", - "showRankingScoreDetails": true, - }), - |response, code| { - meili_snap::snapshot!(code, @"400 Bad Request"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" - { - "message": "Computing score details requires enabling the `score details` experimental feature. See https://github.com/meilisearch/product/discussions/674", - "code": "feature_not_enabled", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#feature_not_enabled" - } - "###); - }, - ) - .await; - - let (response, code) = server.set_features(json!({"scoreDetails": true})).await; - meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(response["scoreDetails"], @"true"); + let res = index.add_documents(json!(documents), None).await; + index.wait_task(res.0.uid()).await; index .search( diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs index 6979ff294..a18a93edb 100644 --- a/meilisearch/tests/tasks/webhook.rs +++ b/meilisearch/tests/tasks/webhook.rs @@ -44,10 +44,6 @@ struct WebhookHandle { } async fn create_webhook_server() -> WebhookHandle { - let mut log_builder = env_logger::Builder::new(); - log_builder.parse_filters("info"); - log_builder.init(); - let (sender, receiver) = mpsc::unbounded_channel(); let sender = Arc::new(sender); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index dc2b992e0..4bc05d2cc 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -71,7 +71,6 @@ itertools = "0.11.0" puffin = "0.16.0" # logging -log = "0.4.20" logging_timer = "1.1.0" csv = "1.3.0" candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } @@ -91,6 +90,7 @@ tiktoken-rs = "0.5.8" liquid = "0.26.4" arroy = "0.2.0" rand = "0.8.5" +tracing = "0.1.40" [dev-dependencies] mimalloc = { version = "0.1.39", default-features = false } @@ -102,15 +102,7 @@ meili-snap = { path = "../meili-snap" } rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = [ - "charabia/chinese", - "charabia/hebrew", - "charabia/japanese", - "charabia/thai", - "charabia/korean", - "charabia/greek", - "charabia/khmer", -] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/documents/reader.rs b/milli/src/documents/reader.rs index a8a4c662d..d5eda69d4 100644 --- a/milli/src/documents/reader.rs +++ b/milli/src/documents/reader.rs @@ -25,6 +25,7 @@ impl DocumentsBatchReader { /// /// It first retrieves the index, then moves to the first document. Use the `into_cursor` /// method to iterator over the documents, from the first to the last. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn from_reader(reader: R) -> Result { let reader = grenad::Reader::new(reader)?; let mut cursor = reader.into_cursor()?; diff --git a/milli/src/error.rs b/milli/src/error.rs index 5a4fbc7f5..1147085dd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -227,6 +227,22 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco source_: crate::vector::settings::EmbedderSource, embedder_name: String, }, + #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")] + InvalidOpenAiModelDimensions { + embedder_name: String, + model: &'static str, + dimensions: usize, + expected_dimensions: usize, + }, + #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")] + InvalidOpenAiModelDimensionsMax { + embedder_name: String, + model: &'static str, + dimensions: usize, + max_dimensions: usize, + }, + #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] + InvalidSettingsDimensions { embedder_name: String }, } impl From for Error { diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7bac5ea0c..e411bd032 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -6,9 +6,9 @@ use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; -use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; +use tracing::error; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index b439b87ec..e7bafaf70 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( continue; } + let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id()); + let entered = span.enter(); + let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( ctx, logger, @@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( back!(); continue; }; + drop(entered); ranking_rule_scores.push(next_bucket.score); diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index a6c7ff2b1..6715939dc 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> { ClearDocuments { wtxn, index } } + #[tracing::instrument( + level = "trace", + skip(self), + target = "indexing::documents", + name = "clear_documents" + )] pub fn execute(self) -> Result { puffin::profile_function!(); diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index ad8a838c8..400507c97 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption}; use grenad::{CompressionType, SortAlgorithm}; use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::BytesEncode; -use log::debug; use time::OffsetDateTime; +use tracing::debug; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 03eb3f4de..162136912 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result}; /// # Panics /// /// - if reader.is_empty(), this function may panic in some cases +#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn enrich_documents_batch( rtxn: &heed::RoTxn, index: &Index, @@ -77,7 +78,7 @@ pub fn enrich_documents_batch( }, [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [(field_id, name)] => { - log::info!("Primary key was not specified in index. Inferred to '{name}'"); + tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); PrimaryKey::Flat { name, field_id: *field_id } } multiple => { @@ -143,6 +144,8 @@ pub fn enrich_documents_batch( /// Retrieve the document id after validating it, returning a `UserError` /// if the id is invalid or can't be guessed. +#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document) +target = "indexing::documents")] fn fetch_or_generate_document_id( document: &obkv::KvReader, documents_batch_index: &DocumentsBatchIndex, diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index a6bbf939a..d568154b2 100644 --- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R /// /// Returns the generated internal documents ids and a grenad reader /// with the list of extracted words from the given chunk of documents. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_docid_word_positions( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index f860aacba..33def5abd 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -16,7 +16,7 @@ use crate::Result; /// /// Returns a grenad reader with the list of extracted facet numbers and /// documents ids from the given chunk of docid facet number positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_number_docids( fid_docid_facet_number: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 2ade776c3..d14be7464 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -15,7 +15,7 @@ use crate::{FieldId, Result}; /// /// Returns a grenad reader with the list of extracted facet strings and /// documents ids from the given chunk of docid facet string positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_facet_string_docids( docid_fid_facet_string: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index b7de1e621..1f8af372d 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -39,7 +39,7 @@ pub struct ExtractedFacetValues { /// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// and the normalized value as value extracted from the given chunk of documents. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_docid_facet_values( obkv_documents: grenad::Reader, indexer: GrenadParameters, @@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues { if let Ok(float) = original.parse() { output_numbers.push(float); } else { - log::warn!( + tracing::warn!( "Internal error, could not parse a geofield that has been validated. Please open an issue." ) } diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 182d0c5d8..305af3630 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30; /// /// Returns a grenad reader with the list of extracted field id word counts /// and documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_fid_word_count_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs index b3600e3bc..cfcc021c6 100644 --- a/milli/src/update/index_documents/extract/extract_geo_points.rs +++ b/milli/src/update/index_documents/extract/extract_geo_points.rs @@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result}; /// Extracts the geographical coordinates contained in each document under the `_geo` field. /// /// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude) -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_geo_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 87181edc2..117f6cc8c 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -67,7 +67,7 @@ impl VectorStateDelta { /// Extracts the embedding vector contained in each document under the `_vectors` field. /// /// Returns the generated grenad reader containing the docid as key associated to the Vec -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_vector_points( obkv_documents: grenad::Reader, indexer: GrenadParameters, @@ -186,12 +186,12 @@ pub fn extract_vector_points( prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; if old_prompt != new_prompt { - log::trace!( + tracing::trace!( "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" ); VectorStateDelta::NowGenerated(new_prompt) } else { - log::trace!("⏭️ Prompt unmodified, skipping"); + tracing::trace!("⏭️ Prompt unmodified, skipping"); VectorStateDelta::NoChange } } else { diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 66092821f..f38701dac 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result}; /// /// The first returned reader is the one for normal word_docids, and the second one is for /// exact_word_docids -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -135,6 +135,7 @@ pub fn extract_word_docids( )) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_into_sorter( document_id: DocumentId, fid: FieldId, diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs index d364b3e13..82a94ce00 100644 --- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs @@ -19,7 +19,7 @@ use crate::{DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted word pairs proximities and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_pair_proximity_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids( if current_document_id.map_or(false, |id| id != document_id) { puffin::profile_scope!("Document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( current_document_id.unwrap(), &del_word_pair_proximity, @@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids( if let Some(document_id) = current_document_id { puffin::profile_scope!("Final document into sorter"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter"); + let _entered = span.enter(); + document_word_positions_into_sorter( document_id, &del_word_pair_proximity, @@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids( } { puffin::profile_scope!("sorter_into_reader"); + // FIXME: span inside of a hot loop might degrade performance and create big reports + let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader"); + let _entered = span.enter(); + let mut writer = create_writer( indexer.chunk_compression_type, indexer.chunk_compression_level, diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs index 89b77d140..4bc553d9a 100644 --- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs @@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result}; /// /// Returns a grenad reader with the list of extracted words at positions and /// documents ids from the given chunk of docid word positions. -#[logging_timer::time] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub fn extract_word_position_docids( docid_word_positions: grenad::Reader, indexer: GrenadParameters, @@ -94,6 +94,7 @@ pub fn extract_word_position_docids( Ok(word_position_docids_reader) } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn words_position_into_sorter( document_id: DocumentId, key_buffer: &mut Vec, diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 43e9a36ec..44f54ff26 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -14,8 +14,8 @@ use std::fs::File; use std::io::BufReader; use crossbeam_channel::Sender; -use log::debug; use rayon::prelude::*; +use tracing::debug; use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_facet_number_docids::extract_facet_number_docids; @@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result}; /// Extract data for each databases from obkv documents in parallel. /// Send data in grenad file over provided Sender. #[allow(clippy::too_many_arguments)] +#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] pub(crate) fn data_from_obkv_documents( original_obkv_chunks: impl Iterator>>> + Send, flattened_obkv_chunks: impl Iterator>>> + Send, @@ -113,7 +114,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-exists-docids"); + debug!(database = "facet-id-exists-docids", "merge"); match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader))); @@ -129,7 +130,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-is-null-docids"); + debug!(database = "facet-id-is-null-docids", "merge"); match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader))); @@ -145,7 +146,7 @@ pub(crate) fn data_from_obkv_documents( { let lmdb_writer_sx = lmdb_writer_sx.clone(); rayon::spawn(move || { - debug!("merge {} database", "facet-id-is-empty-docids"); + debug!(database = "facet-id-is-empty-docids", "merge"); match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { Ok(reader) => { let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); @@ -257,13 +258,22 @@ fn spawn_extraction_task( M: MergeableReader + FromParallelIterator + Send + 'static, M::Output: Send, { + let current_span = tracing::Span::current(); + rayon::spawn(move || { - puffin::profile_scope!("extract_multiple_chunks", name); + let child_span = + tracing::trace_span!(target: "", parent: ¤t_span, "extract_multiple_chunks"); + let _entered = child_span.enter(); + puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name); let chunks: Result = chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); + let current_span = tracing::Span::current(); + rayon::spawn(move || match chunks { Ok(chunks) => { - debug!("merge {} database", name); + let child_span = tracing::trace_span!(target: "", parent: ¤t_span, "merge_multiple_chunks"); + let _entered = child_span.enter(); + debug!(database = name, "merge"); puffin::profile_scope!("merge_multiple_chunks", name); let reader = chunks.merge(merge_fn, &indexer); let _ = lmdb_writer_sx.send(reader.map(serialize_fn)); diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index e1b27baa2..3e63fcf77 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -9,6 +9,10 @@ use super::{ClonableMmap, MergeFn}; use crate::update::index_documents::valid_lmdb_key; use crate::Result; +/// This is something reasonable given the fact +/// that there is one grenad sorter by thread. +const MAX_GRENAD_SORTER_USAGE: usize = 500 * 1024 * 1024; // 500 MiB + pub type CursorClonableMmap = io::Cursor; pub fn create_writer( @@ -24,6 +28,9 @@ pub fn create_writer( builder.build(BufWriter::new(file)) } +/// A helper function that creates a grenad sorter +/// with the given parameters. The max memory is +/// clamped to something reasonable. pub fn create_sorter( sort_algorithm: grenad::SortAlgorithm, merge: MergeFn, @@ -41,7 +48,7 @@ pub fn create_sorter( builder.max_nb_chunks(nb_chunks); } if let Some(memory) = max_memory { - builder.dump_threshold(memory); + builder.dump_threshold(memory.min(MAX_GRENAD_SORTER_USAGE)); builder.allow_realloc(false); } builder.sort_algorithm(sort_algorithm); @@ -49,6 +56,7 @@ pub fn create_sorter( builder.build() } +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn sorter_into_reader( sorter: grenad::Sorter, indexer: GrenadParameters, @@ -187,10 +195,15 @@ impl Default for GrenadParameters { impl GrenadParameters { /// This function use the number of threads in the current threadpool to compute the value. + /// /// This should be called inside of a rayon thread pool, - /// Otherwise, it will take the global number of threads. + /// otherwise, it will take the global number of threads. + /// + /// The max memory cannot exceed a given reasonable value. pub fn max_memory_by_thread(&self) -> Option { - self.max_memory.map(|max_memory| max_memory / rayon::current_num_threads()) + self.max_memory.map(|max_memory| { + (max_memory / rayon::current_num_threads()).min(MAX_GRENAD_SORTER_USAGE) + }) } } @@ -240,6 +253,7 @@ pub fn grenad_obkv_into_chunks( /// Write provided sorter in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")] pub fn write_sorter_into_database( sorter: Sorter, database: &heed::Database, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 83d2c08e1..36aa94964 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -13,11 +13,11 @@ use std::result::Result as StdResult; use crossbeam_channel::{Receiver, Sender}; use heed::types::Str; use heed::Database; -use log::debug; use rand::SeedableRng; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use slice_group_by::GroupBy; +use tracing::debug_span; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; @@ -134,6 +134,7 @@ where /// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward. /// /// Returns the number of documents added to the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn add_documents( mut self, reader: DocumentsBatchReader, @@ -179,6 +180,7 @@ where /// Remove a batch of documents from the current builder. /// /// Returns the number of documents deleted from the builder. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( mut self, to_delete: Vec, @@ -214,6 +216,7 @@ where /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( mut self, to_delete: &RoaringBitmap, @@ -237,7 +240,12 @@ where Ok((self, deleted_documents)) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace" + skip_all, + target = "indexing::documents", + name = "index_documents" + )] pub fn execute(mut self) -> Result { puffin::profile_function!(); @@ -273,7 +281,12 @@ where } /// Returns the total number of documents in the index after the update. - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "profile::indexing::details", + name = "index_documents_raw" + )] pub fn execute_raw(self, output: TransformOutput) -> Result where FP: Fn(UpdateIndexingStep) + Sync, @@ -374,8 +387,12 @@ where let cloned_embedder = self.embedders.clone(); + let current_span = tracing::Span::current(); + // Run extraction pipeline in parallel. pool.install(|| { + let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks"); + let _enter = child_span.enter(); puffin::profile_scope!("extract_and_send_grenad_chunks"); // split obkv file into several chunks let original_chunk_iter = @@ -489,10 +506,7 @@ where documents_seen: documents_seen_count as usize, total_documents: documents_count, }); - debug!( - "We have seen {} documents on {} total document so far", - documents_seen_count, documents_count - ); + debug_span!("Seen", documents = documents_seen_count, total = documents_count); } if is_merged_database { databases_seen += 1; @@ -543,7 +557,12 @@ where Ok(number_of_documents) } - #[logging_timer::time("IndexDocuments::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_prefix_databases" + )] pub fn execute_prefix_databases( self, word_docids: Option>, @@ -598,6 +617,8 @@ where let del_prefix_fst_words; { + let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs"); + let _entered = span.enter(); puffin::profile_scope!("compute_prefix_diffs"); current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; @@ -722,6 +743,12 @@ where /// Run the word prefix docids update operation. #[allow(clippy::too_many_arguments)] +#[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "index_documents_word_prefix_docids" +)] fn execute_word_prefix_docids( txn: &mut heed::RwTxn, reader: grenad::Reader>, diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index ab8e27edb..e5392092f 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> { }) } - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn read_documents( &mut self, reader: EnrichedDocumentsBatchReader, @@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db, /// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids. /// - If the document to remove was not present in either the db or the transform we do nothing. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")] pub fn remove_documents( &mut self, mut to_remove: Vec, @@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// - No batching using the standards `remove_documents` and `add_documents` took place /// /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")] pub fn remove_documents_from_db_no_batch( &mut self, to_remove: &RoaringBitmap, @@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> { // Flatten a document from the fields ids map contained in self and insert the new // created fields. Returns `None` if the document doesn't need to be flattened. + #[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")] fn flatten_from_fields_ids_map(&mut self, obkv: KvReader) -> Result>> { if obkv .iter() @@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> { /// Generate the `TransformOutput` based on the given sorter that can be generated from any /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// id for the user side and the value must be an obkv where keys are valid fields ids. - #[logging_timer::time] + #[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")] pub(crate) fn output_from_sorter( self, wtxn: &mut heed::RwTxn, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 58cb9bb02..af828fee6 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -115,6 +115,7 @@ impl TypedChunk { /// Write typed chunk in the corresponding LMDB database of the provided index. /// Return new documents seen. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] pub(crate) fn write_typed_chunk_into_index( typed_chunk: TypedChunk, index: &Index, @@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index( let mut is_merged_database = false; match typed_chunk { TypedChunk::Documents(obkv_documents_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "documents"); + let _entered = span.enter(); let mut operations: Vec = Default::default(); let mut docids = index.documents_ids(wtxn)?; @@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index( index.put_documents_ids(wtxn, &docids)?; } TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids"); + let _entered = span.enter(); append_entries_into_database( fid_word_count_docids_iter, &index.field_id_word_count_docids, @@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index( exact_word_docids_reader, word_fid_docids_reader, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_docids"); + let _entered = span.enter(); let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?; append_entries_into_database( word_docids_iter.clone(), @@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPositionDocids(word_position_docids_iter) => { + let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids"); + let _entered = span.enter(); append_entries_into_database( word_position_docids_iter, &index.word_position_docids, @@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids"); + let _entered = span.enter(); let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter); indexer.execute(wtxn)?; is_merged_database = true; } TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_exists_docids, &index.facet_id_exists_docids, @@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_null_docids, &index.facet_id_is_null_docids, @@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { + let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids"); + let _entered = span.enter(); append_entries_into_database( facet_id_is_empty_docids, &index.facet_id_is_empty_docids, @@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids"); + let _entered = span.enter(); append_entries_into_database( word_pair_proximity_docids_iter, &index.word_pair_proximity_docids, @@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index( is_merged_database = true; } TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers"); + let _entered = span.enter(); let index_fid_docid_facet_numbers = index.field_id_docid_facet_f64s.remap_types::(); let mut cursor = fid_docid_facet_number.into_cursor()?; @@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { + let span = + tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings"); + let _entered = span.enter(); let index_fid_docid_facet_strings = index.field_id_docid_facet_strings.remap_types::(); let mut cursor = fid_docid_facet_string.into_cursor()?; @@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index( } } TypedChunk::GeoPoints(geo_points) => { + let span = tracing::trace_span!(target: "indexing::write_db", "geo_points"); + let _entered = span.enter(); let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?; @@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index( expected_dimension, embedder_name, } => { + let span = tracing::trace_span!(target: "indexing::write_db", "vector_points"); + let _entered = span.enter(); let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; @@ -480,9 +517,11 @@ pub(crate) fn write_typed_chunk_into_index( } } - log::debug!("Finished vector chunk for {}", embedder_name); + tracing::debug!("Finished vector chunk for {}", embedder_name); } TypedChunk::ScriptLanguageDocids(sl_map) => { + let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); + let _entered = span.enter(); for (key, (deletion, addition)) in sl_map { let mut db_key_exists = false; let final_value = match index.script_language_docids.get(wtxn, &key)? { @@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst( /// Write provided entries in database using serialize_value function. /// merge_values function is used if an entry already exist in the database. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn write_entries_into_database( data: grenad::Reader, database: &heed::Database, @@ -582,6 +622,7 @@ where /// merge_values function is used if an entry already exist in the database. /// All provided entries must be ordered. /// If the index is not empty, write_entries_into_database is called instead. +#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")] fn append_entries_into_database( data: grenad::Reader, database: &heed::Database, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index d770bcd74..3cad79467 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.embedder_settings = Setting::Reset; } + #[tracing::instrument( + level = "trace" + skip(self, progress_callback, should_abort, old_fields_ids_map), + target = "indexing::documents" + )] fn reindex( &mut self, progress_callback: &FP, @@ -974,6 +979,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { crate::vector::settings::EmbeddingSettings::apply_default_source( &mut setting, ); + crate::vector::settings::EmbeddingSettings::apply_default_openai_model( + &mut setting, + ); let setting = validate_embedding_settings(setting, &name)?; changed = true; new_configs.insert(name, setting); @@ -1119,6 +1127,14 @@ pub fn validate_embedding_settings( let Setting::Set(settings) = settings else { return Ok(settings) }; let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } = settings; + + if let Some(0) = dimensions.set() { + return Err(crate::error::UserError::InvalidSettingsDimensions { + embedder_name: name.to_owned(), + } + .into()); + } + let Some(inferred_source) = source.set() else { return Ok(Setting::Set(EmbeddingSettings { source, @@ -1132,14 +1148,34 @@ pub fn validate_embedding_settings( match inferred_source { EmbedderSource::OpenAi => { check_unset(&revision, "revision", inferred_source, name)?; - check_unset(&dimensions, "dimensions", inferred_source, name)?; if let Setting::Set(model) = &model { - crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or( - crate::error::UserError::InvalidOpenAiModel { + let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) + .ok_or(crate::error::UserError::InvalidOpenAiModel { embedder_name: name.to_owned(), model: model.clone(), - }, - )?; + })?; + if let Setting::Set(dimensions) = dimensions { + if !model.supports_overriding_dimensions() + && dimensions != model.default_dimensions() + { + return Err(crate::error::UserError::InvalidOpenAiModelDimensions { + embedder_name: name.to_owned(), + model: model.name(), + dimensions, + expected_dimensions: model.default_dimensions(), + } + .into()); + } + if dimensions > model.default_dimensions() { + return Err(crate::error::UserError::InvalidOpenAiModelDimensionsMax { + embedder_name: name.to_owned(), + model: model.name(), + dimensions, + max_dimensions: model.default_dimensions(), + } + .into()); + } + } } } EmbedderSource::HuggingFace => { diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 544bea224..99c6c815e 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "word_prefix_docids" + )] pub fn execute( self, mut new_word_docids_iter: grenad::ReaderCursor, diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 819cc097b..a05eb8721 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -4,7 +4,7 @@ use std::str; use grenad::CompressionType; use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Database}; -use log::debug; +use tracing::debug; use crate::error::SerializationError; use crate::heed_codec::StrBEU16Codec; @@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> { } } - #[logging_timer::time("WordPrefixIntegerDocids::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_integer_docids" + )] pub fn execute( self, new_word_integer_docids: grenad::Reader, diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs index f26bf93e5..bb1830727 100644 --- a/milli/src/update/words_prefixes_fst.rs +++ b/milli/src/update/words_prefixes_fst.rs @@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> { self } - #[logging_timer::time("WordsPrefixesFst::{}")] + #[tracing::instrument( + level = "trace", + skip_all, + target = "indexing::prefix", + name = "words_prefix_fst" + )] pub fn execute(self) -> Result<()> { puffin::profile_function!(); diff --git a/milli/src/vector/hf.rs b/milli/src/vector/hf.rs index cdfdbfb75..04e169c71 100644 --- a/milli/src/vector/hf.rs +++ b/milli/src/vector/hf.rs @@ -73,7 +73,7 @@ impl Embedder { let device = match candle_core::Device::cuda_if_available(0) { Ok(device) => device, Err(error) => { - log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); + tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); candle_core::Device::Cpu } }; diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 524f83b80..cbddddfb7 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -17,6 +17,7 @@ pub struct Embedder { pub struct EmbedderOptions { pub api_key: Option, pub embedding_model: EmbeddingModel, + pub dimensions: Option, } #[derive( @@ -41,34 +42,50 @@ pub enum EmbeddingModel { #[serde(rename = "text-embedding-ada-002")] #[deserr(rename = "text-embedding-ada-002")] TextEmbeddingAda002, + + #[serde(rename = "text-embedding-3-small")] + #[deserr(rename = "text-embedding-3-small")] + TextEmbedding3Small, + + #[serde(rename = "text-embedding-3-large")] + #[deserr(rename = "text-embedding-3-large")] + TextEmbedding3Large, } impl EmbeddingModel { pub fn supported_models() -> &'static [&'static str] { - &["text-embedding-ada-002"] + &["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"] } pub fn max_token(&self) -> usize { match self { EmbeddingModel::TextEmbeddingAda002 => 8191, + EmbeddingModel::TextEmbedding3Large => 8191, + EmbeddingModel::TextEmbedding3Small => 8191, } } - pub fn dimensions(&self) -> usize { + pub fn default_dimensions(&self) -> usize { match self { EmbeddingModel::TextEmbeddingAda002 => 1536, + EmbeddingModel::TextEmbedding3Large => 3072, + EmbeddingModel::TextEmbedding3Small => 1536, } } pub fn name(&self) -> &'static str { match self { EmbeddingModel::TextEmbeddingAda002 => "text-embedding-ada-002", + EmbeddingModel::TextEmbedding3Large => "text-embedding-3-large", + EmbeddingModel::TextEmbedding3Small => "text-embedding-3-small", } } pub fn from_name(name: &str) -> Option { match name { "text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002), + "text-embedding-3-large" => Some(EmbeddingModel::TextEmbedding3Large), + "text-embedding-3-small" => Some(EmbeddingModel::TextEmbedding3Small), _ => None, } } @@ -78,6 +95,20 @@ impl EmbeddingModel { EmbeddingModel::TextEmbeddingAda002 => { Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 }) } + EmbeddingModel::TextEmbedding3Large => { + Some(DistributionShift { current_mean: 0.70, current_sigma: 0.1 }) + } + EmbeddingModel::TextEmbedding3Small => { + Some(DistributionShift { current_mean: 0.75, current_sigma: 0.1 }) + } + } + } + + pub fn supports_overriding_dimensions(&self) -> bool { + match self { + EmbeddingModel::TextEmbeddingAda002 => false, + EmbeddingModel::TextEmbedding3Large => true, + EmbeddingModel::TextEmbedding3Small => true, } } } @@ -86,11 +117,11 @@ pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings"; impl EmbedderOptions { pub fn with_default_model(api_key: Option) -> Self { - Self { api_key, embedding_model: Default::default() } + Self { api_key, embedding_model: Default::default(), dimensions: None } } pub fn with_embedding_model(api_key: Option, embedding_model: EmbeddingModel) -> Self { - Self { api_key, embedding_model } + Self { api_key, embedding_model, dimensions: None } } } @@ -142,12 +173,16 @@ impl Embedder { let retry_duration = match result { Ok(embeddings) => return Ok(embeddings), Err(retry) => { - log::warn!("Failed: {}", retry.error); + tracing::warn!("Failed: {}", retry.error); tokenized |= retry.must_tokenize(); retry.into_duration(attempt) } }?; - log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis()); + tracing::warn!( + "Attempt #{}, retrying after {}ms.", + attempt, + retry_duration.as_millis() + ); tokio::time::sleep(retry_duration).await; } @@ -213,7 +248,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); + tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( error_response.error, @@ -235,9 +270,13 @@ impl Embedder { client: &reqwest::Client, ) -> Result>, Retry> { for text in texts { - log::trace!("Received prompt: {}", text.as_ref()) + tracing::trace!("Received prompt: {}", text.as_ref()) } - let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts }; + let request = OpenAiRequest { + model: self.options.embedding_model.name(), + input: texts, + dimensions: self.overriden_dimensions(), + }; let response = client .post(OPENAI_EMBEDDINGS_URL) .json(&request) @@ -254,7 +293,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::trace!("response: {:?}", response.data); + tracing::trace!("response: {:?}", response.data); Ok(response .data @@ -280,8 +319,7 @@ impl Embedder { } let mut tokens = encoded.as_slice(); - let mut embeddings_for_prompt = - Embeddings::new(self.options.embedding_model.dimensions()); + let mut embeddings_for_prompt = Embeddings::new(self.dimensions()); while tokens.len() > max_token_count { let window = &tokens[..max_token_count]; embeddings_for_prompt.push(self.embed_tokens(window, client).await?).unwrap(); @@ -322,8 +360,11 @@ impl Embedder { tokens: &[usize], client: &reqwest::Client, ) -> Result { - let request = - OpenAiTokensRequest { model: self.options.embedding_model.name(), input: tokens }; + let request = OpenAiTokensRequest { + model: self.options.embedding_model.name(), + input: tokens, + dimensions: self.overriden_dimensions(), + }; let response = client .post(OPENAI_EMBEDDINGS_URL) .json(&request) @@ -366,12 +407,24 @@ impl Embedder { } pub fn dimensions(&self) -> usize { - self.options.embedding_model.dimensions() + if self.options.embedding_model.supports_overriding_dimensions() { + self.options.dimensions.unwrap_or(self.options.embedding_model.default_dimensions()) + } else { + self.options.embedding_model.default_dimensions() + } } pub fn distribution(&self) -> Option { self.options.embedding_model.distribution() } + + fn overriden_dimensions(&self) -> Option { + if self.options.embedding_model.supports_overriding_dimensions() { + self.options.dimensions + } else { + None + } + } } // retrying in case of failure @@ -431,12 +484,16 @@ impl Retry { struct OpenAiRequest<'a, S: AsRef + serde::Serialize> { model: &'a str, input: &'a [S], + #[serde(skip_serializing_if = "Option::is_none")] + dimensions: Option, } #[derive(Debug, Serialize)] struct OpenAiTokensRequest<'a> { model: &'a str, input: &'a [usize], + #[serde(skip_serializing_if = "Option::is_none")] + dimensions: Option, } #[derive(Debug, Deserialize)] diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 37fb80452..834a1c81d 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,6 +1,7 @@ use deserr::Deserr; use serde::{Deserialize, Serialize}; +use super::openai; use crate::prompt::PromptData; use crate::update::Setting; use crate::vector::EmbeddingConfig; @@ -82,7 +83,7 @@ impl EmbeddingSettings { Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], Self::REVISION => &[EmbedderSource::HuggingFace], Self::API_KEY => &[EmbedderSource::OpenAi], - Self::DIMENSIONS => &[EmbedderSource::UserProvided], + Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided], Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi], _other => unreachable!("unknown field"), } @@ -90,9 +91,13 @@ impl EmbeddingSettings { pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] { match source { - EmbedderSource::OpenAi => { - &[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE] - } + EmbedderSource::OpenAi => &[ + Self::SOURCE, + Self::MODEL, + Self::API_KEY, + Self::DOCUMENT_TEMPLATE, + Self::DIMENSIONS, + ], EmbedderSource::HuggingFace => { &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] } @@ -109,6 +114,17 @@ impl EmbeddingSettings { *source = Setting::Set(EmbedderSource::default()) } } + + pub(crate) fn apply_default_openai_model(setting: &mut Setting) { + if let Setting::Set(EmbeddingSettings { + source: Setting::Set(EmbedderSource::OpenAi), + model: model @ (Setting::NotSet | Setting::Reset), + .. + }) = setting + { + *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) + } + } } #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)] @@ -176,7 +192,7 @@ impl From for EmbeddingSettings { model: Setting::Set(options.embedding_model.name().to_owned()), revision: Setting::NotSet, api_key: options.api_key.map(Setting::Set).unwrap_or_default(), - dimensions: Setting::NotSet, + dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(), document_template: Setting::Set(prompt.template), }, super::EmbedderOptions::UserProvided(options) => Self { @@ -208,6 +224,9 @@ impl From for EmbeddingConfig { if let Some(api_key) = api_key.set() { options.api_key = Some(api_key); } + if let Some(dimensions) = dimensions.set() { + options.dimensions = Some(dimensions); + } this.embedder_options = super::EmbedderOptions::OpenAi(options); } EmbedderSource::HuggingFace => { diff --git a/tracing-trace/Cargo.toml b/tracing-trace/Cargo.toml new file mode 100644 index 000000000..64848bff9 --- /dev/null +++ b/tracing-trace/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "tracing-trace" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +color-spantrace = "0.2.1" +fxprof-processed-profile = "0.6.0" +serde = { version = "1.0.195", features = ["derive"] } +serde_json = "1.0.111" +tracing = "0.1.40" +tracing-error = "0.2.0" +tracing-subscriber = "0.3.18" +byte-unit = { version = "4.0.19", default-features = false, features = [ + "std", + "serde", +] } +tokio = { version = "1.35.1", features = ["sync"] } + +[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies] +libproc = "0.14.2" diff --git a/tracing-trace/src/bin/trace-to-callstats.rs b/tracing-trace/src/bin/trace-to-callstats.rs new file mode 100644 index 000000000..631cdbb45 --- /dev/null +++ b/tracing-trace/src/bin/trace-to-callstats.rs @@ -0,0 +1,21 @@ +use std::ffi::OsString; +use std::io::Write; + +use serde_json::json; + +fn main() { + let input_file = std::env::args_os().nth(1).expect("missing file"); + let input = + std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open ")); + let trace = tracing_trace::TraceReader::new(input); + let profile = tracing_trace::processor::span_stats::to_call_stats(trace).unwrap(); + let mut output_file = OsString::new(); + output_file.push("callstats-"); + output_file.push(input_file); + let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); + for (key, value) in profile { + serde_json::to_writer(&mut output_file, &json!({key: value})).unwrap(); + writeln!(&mut output_file).unwrap(); + } + output_file.flush().unwrap(); +} diff --git a/tracing-trace/src/bin/trace-to-firefox.rs b/tracing-trace/src/bin/trace-to-firefox.rs new file mode 100644 index 000000000..21adff41d --- /dev/null +++ b/tracing-trace/src/bin/trace-to-firefox.rs @@ -0,0 +1,18 @@ +use std::ffi::OsString; +use std::io::Write; + +fn main() { + let input_file = std::env::args_os().nth(1).expect("missing file"); + let input = + std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open ")); + let trace = tracing_trace::TraceReader::new(input); + let profile = + tracing_trace::processor::firefox_profiler::to_firefox_profile(trace, "Meilisearch") + .unwrap(); + let mut output_file = OsString::new(); + output_file.push("firefox-"); + output_file.push(input_file); + let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap()); + serde_json::to_writer(&mut output_file, &profile).unwrap(); + output_file.flush().unwrap(); +} diff --git a/tracing-trace/src/entry.rs b/tracing-trace/src/entry.rs new file mode 100644 index 000000000..26e543ba0 --- /dev/null +++ b/tracing-trace/src/entry.rs @@ -0,0 +1,141 @@ +use std::borrow::Cow; + +use serde::{Deserialize, Serialize}; +use tracing::span::Id as TracingId; + +#[derive(Debug, Serialize, Deserialize)] +pub enum Entry { + /// A code location was accessed for the first time + NewCallsite(NewCallsite), + + /// A new thread was accessed + NewThread(NewThread), + + /// A new call started + NewSpan(NewSpan), + + /// An already in-flight call started doing work. + /// + /// For synchronous functions, open should always be followed immediately by enter, exit and close, + /// but for asynchronous functions, work can suspend (exiting the span without closing it), and then + /// later resume (entering the span again without opening it). + /// + /// The timer for a span only starts when the span is entered. + SpanEnter(SpanEnter), + + /// An in-flight call suspended and paused work. + /// + /// For synchronous functions, exit should always be followed immediately by close, + /// but for asynchronous functions, work can suspend and then later resume. + /// + /// The timer for a span pauses when the span is exited. + SpanExit(SpanExit), + + /// A call ended + SpanClose(SpanClose), + + /// An event occurred + Event(Event), +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct SpanId(u64); + +impl From<&TracingId> for SpanId { + fn from(value: &TracingId) -> Self { + Self(value.into_u64()) + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NewCallsite { + pub call_id: ResourceId, + pub name: Cow<'static, str>, + pub module_path: Option>, + pub file: Option>, + pub line: Option, + pub target: Cow<'static, str>, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct NewThread { + pub thread_id: ResourceId, + pub name: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanEnter { + pub id: SpanId, + pub time: std::time::Duration, + pub memory: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanExit { + pub id: SpanId, + pub time: std::time::Duration, + pub memory: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct Event { + pub call_id: ResourceId, + pub thread_id: ResourceId, + pub parent_id: Option, + pub time: std::time::Duration, + pub memory: Option, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct NewSpan { + pub id: SpanId, + pub call_id: ResourceId, + pub parent_id: Option, + pub thread_id: ResourceId, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct SpanClose { + pub id: SpanId, + pub time: std::time::Duration, +} + +/// A struct with a memory allocation stat. +#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] +pub struct MemoryStats { + /// Resident set size, measured in bytes. + /// (same as VmRSS in /proc//status). + pub resident: u64, +} + +impl MemoryStats { + #[cfg(any(target_os = "linux", target_os = "macos"))] + pub fn fetch() -> Option { + use libproc::libproc::pid_rusage::{pidrusage, RUsageInfoV0}; + + match pidrusage(std::process::id() as i32) { + Ok(RUsageInfoV0 { ri_resident_size, .. }) => { + Some(MemoryStats { resident: ri_resident_size }) + } + Err(_) => None, /* ignoring error to avoid spamming */ + } + } + + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + pub fn fetch() -> Option { + None + } + + pub fn checked_sub(self, other: Self) -> Option { + Some(Self { resident: self.resident.checked_sub(other.resident)? }) + } +} + +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)] +pub struct ResourceId(pub(crate) usize); + +impl ResourceId { + pub fn to_usize(self) -> usize { + self.0 + } +} diff --git a/tracing-trace/src/error.rs b/tracing-trace/src/error.rs new file mode 100644 index 000000000..831da1e9d --- /dev/null +++ b/tracing-trace/src/error.rs @@ -0,0 +1,21 @@ +#[derive(Debug)] +pub enum Error { + Json(serde_json::Error), +} + +impl std::error::Error for Error {} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("error de/serializing trace entry:")?; + match self { + Error::Json(error) => std::fmt::Display::fmt(&error, f), + } + } +} + +impl From for Error { + fn from(value: serde_json::Error) -> Self { + Self::Json(value) + } +} diff --git a/tracing-trace/src/layer.rs b/tracing-trace/src/layer.rs new file mode 100644 index 000000000..f2f1d64ae --- /dev/null +++ b/tracing-trace/src/layer.rs @@ -0,0 +1,208 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::io::Write; +use std::ops::ControlFlow; +use std::sync::RwLock; + +use tracing::span::{Attributes, Id as TracingId}; +use tracing::{Metadata, Subscriber}; +use tracing_subscriber::layer::Context; +use tracing_subscriber::Layer; + +use crate::entry::{ + Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + SpanExit, SpanId, +}; +use crate::{Error, Trace, TraceWriter}; + +/// Layer that measures the time spent in spans. +pub struct TraceLayer { + sender: tokio::sync::mpsc::UnboundedSender, + callsites: RwLock>, + start_time: std::time::Instant, + profile_memory: bool, +} + +impl Trace { + pub fn new(profile_memory: bool) -> (Self, TraceLayer) { + let (sender, receiver) = tokio::sync::mpsc::unbounded_channel(); + let trace = Trace { receiver }; + let layer = TraceLayer { + sender, + callsites: Default::default(), + start_time: std::time::Instant::now(), + profile_memory, + }; + (trace, layer) + } +} + +impl TraceWriter { + pub fn new(writer: W, profile_memory: bool) -> (Self, TraceLayer) { + let (trace, layer) = Trace::new(profile_memory); + (trace.into_writer(writer), layer) + } + + pub async fn receive(&mut self) -> Result, Error> { + let Some(entry) = self.receiver.recv().await else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + /// Panics if called from an asynchronous context + pub fn blocking_receive(&mut self) -> Result, Error> { + let Some(entry) = self.receiver.blocking_recv() else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + pub fn write(&mut self, entry: Entry) -> Result<(), Error> { + Ok(serde_json::ser::to_writer(&mut self.writer, &entry)?) + } + + pub fn try_receive(&mut self) -> Result, Error> { + let Ok(entry) = self.receiver.try_recv() else { + return Ok(ControlFlow::Break(())); + }; + self.write(entry)?; + Ok(ControlFlow::Continue(())) + } + + pub fn flush(&mut self) -> Result<(), std::io::Error> { + self.writer.flush() + } +} + +#[derive(PartialEq, Eq, Hash)] +enum OpaqueIdentifier { + Thread(std::thread::ThreadId), + Call(tracing::callsite::Identifier), +} + +impl TraceLayer { + fn resource_id(&self, opaque: OpaqueIdentifier) -> Option { + self.callsites.read().unwrap().get(&opaque).copied() + } + + fn register_resource_id(&self, opaque: OpaqueIdentifier) -> ResourceId { + let mut map = self.callsites.write().unwrap(); + let len = map.len(); + *map.entry(opaque).or_insert(ResourceId(len)) + } + + fn elapsed(&self) -> std::time::Duration { + self.start_time.elapsed() + } + + fn memory_stats(&self) -> Option { + if self.profile_memory { + MemoryStats::fetch() + } else { + None + } + } + + fn send(&self, entry: Entry) { + // we never care that the other end hanged on us + let _ = self.sender.send(entry); + } + + fn register_callsite(&self, metadata: &'static Metadata<'static>) -> ResourceId { + let call_id = self.register_resource_id(OpaqueIdentifier::Call(metadata.callsite())); + + let module_path = metadata.module_path(); + let file = metadata.file(); + let line = metadata.line(); + let name = metadata.name(); + let target = metadata.target(); + + self.send(Entry::NewCallsite(NewCallsite { + call_id, + module_path: module_path.map(Cow::Borrowed), + file: file.map(Cow::Borrowed), + line, + name: Cow::Borrowed(name), + target: Cow::Borrowed(target), + })); + call_id + } + + fn register_thread(&self) -> ResourceId { + let thread_id = std::thread::current().id(); + let name = std::thread::current().name().map(ToOwned::to_owned); + let thread_id = self.register_resource_id(OpaqueIdentifier::Thread(thread_id)); + self.send(Entry::NewThread(NewThread { thread_id, name })); + thread_id + } +} + +impl Layer for TraceLayer +where + S: Subscriber, +{ + fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) { + let call_id = self + .resource_id(OpaqueIdentifier::Call(attrs.metadata().callsite())) + .unwrap_or_else(|| self.register_callsite(attrs.metadata())); + + let thread_id = self + .resource_id(OpaqueIdentifier::Thread(std::thread::current().id())) + .unwrap_or_else(|| self.register_thread()); + + let parent_id = attrs + .parent() + .cloned() + .or_else(|| tracing::Span::current().id()) + .map(|id| SpanId::from(&id)); + + self.send(Entry::NewSpan(NewSpan { id: id.into(), call_id, parent_id, thread_id })); + } + + fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanEnter(SpanEnter { + id: id.into(), + time: self.elapsed(), + memory: self.memory_stats(), + })) + } + + fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanExit(SpanExit { + id: id.into(), + time: self.elapsed(), + memory: self.memory_stats(), + })) + } + + fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) { + let call_id = self + .resource_id(OpaqueIdentifier::Call(event.metadata().callsite())) + .unwrap_or_else(|| self.register_callsite(event.metadata())); + + let thread_id = self + .resource_id(OpaqueIdentifier::Thread(std::thread::current().id())) + .unwrap_or_else(|| self.register_thread()); + + let parent_id = event + .parent() + .cloned() + .or_else(|| tracing::Span::current().id()) + .map(|id| SpanId::from(&id)); + + self.send(Entry::Event(Event { + call_id, + thread_id, + parent_id, + time: self.elapsed(), + memory: self.memory_stats(), + })) + } + + fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) { + self.send(Entry::SpanClose(SpanClose { id: Into::into(&id), time: self.elapsed() })) + } +} diff --git a/tracing-trace/src/lib.rs b/tracing-trace/src/lib.rs new file mode 100644 index 000000000..77eb61d47 --- /dev/null +++ b/tracing-trace/src/lib.rs @@ -0,0 +1,54 @@ +use std::io::{Read, Write}; + +use entry::Entry; + +pub mod entry; +mod error; +pub mod layer; +pub mod processor; + +pub use error::Error; + +pub struct TraceWriter { + writer: W, + receiver: tokio::sync::mpsc::UnboundedReceiver, +} + +pub struct Trace { + receiver: tokio::sync::mpsc::UnboundedReceiver, +} + +impl Trace { + pub fn into_receiver(self) -> tokio::sync::mpsc::UnboundedReceiver { + self.receiver + } + + pub fn into_writer(self, writer: W) -> TraceWriter { + TraceWriter { writer, receiver: self.receiver } + } +} + +pub struct TraceReader { + reader: R, +} + +impl TraceReader { + pub fn new(reader: R) -> Self { + Self { reader } + } + + fn read(&mut self) -> Option> { + serde_json::Deserializer::from_reader(&mut self.reader) + .into_iter() + .next() + .map(|res| res.map_err(Into::into)) + } +} + +impl Iterator for TraceReader { + type Item = Result; + + fn next(&mut self) -> Option { + self.read() + } +} diff --git a/tracing-trace/src/main.rs b/tracing-trace/src/main.rs new file mode 100644 index 000000000..c2e4f08a7 --- /dev/null +++ b/tracing-trace/src/main.rs @@ -0,0 +1,122 @@ +use tracing::{instrument, Span}; +use tracing_error::{ErrorLayer, InstrumentResult, SpanTrace, TracedError}; + +#[instrument(level = "trace", target = "profile::indexing")] +fn foo() -> Result<(), TracedError> { + let _ = bar(40, 2); + bar(40, 2) +} + +#[derive(Debug)] +pub enum Error { + XTooBig, +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("x too big") + } +} + +impl std::error::Error for Error {} + +#[instrument(level = "trace", target = "profile::indexing")] +fn bar(x: u32, y: u32) -> Result<(), TracedError> { + let handle_ok = spawn_in_current_scope(move || baz(y)); + let handle = spawn_in_current_scope(move || baz(x + y)); + handle_ok.join().unwrap().and(handle.join().unwrap()) +} + +pub fn spawn_in_current_scope(f: F) -> std::thread::JoinHandle +where + F: FnOnce() -> T + Send + 'static, + T: Send + 'static, +{ + let current = Span::current(); + std::thread::spawn(move || { + let span = tracing::trace_span!(parent: ¤t, "thread_spawn", id = ?std::thread::current().id(), name = tracing::field::Empty); + if let Some(name) = std::thread::current().name() { + span.record("name", name); + } + span.in_scope(f) + }) +} + +#[instrument(level = "trace", target = "profile::indexing")] +fn baz(x: u32) -> Result<(), TracedError> { + if x > 10 { + fibo_recursive(10); + return Err(Error::XTooBig).in_current_span(); + } + Ok(()) +} + +#[instrument(level = "trace", target = "profile::indexing")] +fn fibo_recursive(n: u32) -> u32 { + if n == 0 { + return 1; + } + if n == 1 { + return 2; + } + return fibo_recursive(n - 1) - fibo_recursive(n - 2); +} + +use tracing_error::ExtractSpanTrace as _; +use tracing_subscriber::layer::SubscriberExt as _; +use tracing_trace::processor; + +fn on_panic(info: &std::panic::PanicInfo) { + let info = info.to_string(); + let trace = SpanTrace::capture(); + tracing::error!(%info, %trace); +} + +fn main() { + let (mut trace, profiling_layer) = + tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap(), true); + + let subscriber = tracing_subscriber::registry() + // any number of other subscriber layers may be added before or + // after the `ErrorLayer`... + .with(ErrorLayer::default()) + .with(profiling_layer); + + // set the subscriber as the default for the application + tracing::subscriber::set_global_default(subscriber).unwrap(); + + std::panic::set_hook(Box::new(on_panic)); + + let res = foo(); + + if let Err(error) = res { + print_extracted_spantraces(&error) + } + + while trace.try_receive().unwrap().is_continue() {} + + trace.flush().unwrap(); + + let trace = tracing_trace::TraceReader::new(std::fs::File::open("trace.json").unwrap()); + + let profile = processor::firefox_profiler::to_firefox_profile(trace, "test").unwrap(); + serde_json::to_writer(std::fs::File::create("processed.json").unwrap(), &profile).unwrap(); +} + +fn print_extracted_spantraces(error: &(dyn std::error::Error + 'static)) { + let mut error = Some(error); + let mut ind = 0; + + eprintln!("Error:"); + + while let Some(err) = error { + if let Some(spantrace) = err.span_trace() { + eprintln!("found a spantrace:\n{}", color_spantrace::colorize(spantrace)); + } else { + eprintln!("{:>4}: {}", ind, err); + } + + error = err.source(); + ind += 1; + } +} diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/tracing-trace/src/processor/firefox_profiler.rs new file mode 100644 index 000000000..bae8ea44a --- /dev/null +++ b/tracing-trace/src/processor/firefox_profiler.rs @@ -0,0 +1,450 @@ +use std::collections::HashMap; + +use fxprof_processed_profile::{ + CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField, + MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile, + ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp, +}; +use serde_json::json; + +use crate::entry::{ + Entry, Event, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit, + SpanId, +}; +use crate::{Error, TraceReader}; + +pub fn to_firefox_profile( + trace: TraceReader, + app: &str, +) -> Result { + let mut profile = Profile::new( + app, + ReferenceTimestamp::from_millis_since_unix_epoch(0.0), + SamplingInterval::from_nanos(15), + ); + + let mut last_timestamp = Timestamp::from_nanos_since_reference(0); + let main = profile.add_process(app, 0, last_timestamp); + + let mut calls = HashMap::new(); + let mut threads = HashMap::new(); + let mut spans = HashMap::new(); + + let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue); + let subcategory = profile.add_subcategory(category, "subcategory"); + + let mut last_memory = MemoryStats::default(); + + let mut memory_counters = None; + + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + let string_handle = profile.intern_string(callsite.name.as_ref()); + calls.insert(callsite.call_id, (callsite, string_handle)); + } + Entry::NewThread(thread) => { + let thread_handle = profile.add_thread( + main, + thread.thread_id.to_usize() as u32, + last_timestamp, + threads.is_empty(), + ); + if let Some(name) = &thread.name { + profile.set_thread_name(thread_handle, name) + } + threads.insert(thread.thread_id, thread_handle); + } + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory }) => { + let (_span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside { time, memory }; + + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + + add_memory_samples( + &mut profile, + main, + memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + } + Entry::SpanExit(SpanExit { id, time, memory }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside { time: begin, memory: begin_memory } = status else { + continue; + }; + last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64); + + let begin = *begin; + let begin_memory = *begin_memory; + + *status = SpanStatus::Outside; + + let span = *span; + let thread_handle = threads.get(&span.thread_id).unwrap(); + + let frames = make_frames(span, &spans, &calls, subcategory); + + profile.add_sample( + *thread_handle, + to_timestamp(begin), + frames.iter().rev().cloned(), + CpuDelta::ZERO, + 1, + ); + profile.add_sample( + *thread_handle, + to_timestamp(time), + frames.iter().rev().cloned(), + CpuDelta::from_nanos((time - begin).as_nanos() as u64), + 1, + ); + + add_memory_samples( + &mut profile, + main, + memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + + let (callsite, _) = calls.get(&span.call_id).unwrap(); + + let memory_delta = + begin_memory.zip(memory).and_then(|(begin, end)| end.checked_sub(begin)); + let marker = SpanMarker { callsite, span: &span, memory_delta }; + + profile.add_marker_with_stack( + *thread_handle, + &callsite.name, + marker, + fxprof_processed_profile::MarkerTiming::Interval( + to_timestamp(begin), + to_timestamp(time), + ), + frames.iter().rev().cloned(), + ) + } + Entry::Event(event) => { + let span = event + .parent_id + .as_ref() + .and_then(|parent_id| spans.get(parent_id)) + .and_then(|(span, status)| match status { + SpanStatus::Outside => None, + SpanStatus::Inside { .. } => Some(span), + }) + .copied(); + let timestamp = to_timestamp(event.time); + + let thread_handle = threads.get(&event.thread_id).unwrap(); + + let frames = span + .map(|span| make_frames(span, &spans, &calls, subcategory)) + .unwrap_or_default(); + + profile.add_sample( + *thread_handle, + timestamp, + frames.iter().rev().cloned(), + CpuDelta::ZERO, + 1, + ); + + let memory_delta = add_memory_samples( + &mut profile, + main, + event.memory, + last_timestamp, + &mut memory_counters, + &mut last_memory, + ); + + let (callsite, _) = calls.get(&event.call_id).unwrap(); + + let marker = EventMarker { callsite, event: &event, memory_delta }; + + profile.add_marker_with_stack( + *thread_handle, + &callsite.name, + marker, + fxprof_processed_profile::MarkerTiming::Instant(timestamp), + frames.iter().rev().cloned(), + ); + + last_timestamp = timestamp; + } + Entry::SpanClose(SpanClose { id, time }) => { + spans.remove(&id); + last_timestamp = to_timestamp(time); + } + } + } + + Ok(profile) +} + +struct MemoryCounterHandles { + usage: CounterHandle, +} + +impl MemoryCounterHandles { + fn new(profile: &mut Profile, main: ProcessHandle) -> Self { + let usage = + profile.add_counter(main, "mimmalloc", "Memory", "Amount of memory currently in use"); + Self { usage } + } +} + +fn add_memory_samples( + profile: &mut Profile, + main: ProcessHandle, + memory: Option, + last_timestamp: Timestamp, + memory_counters: &mut Option, + last_memory: &mut MemoryStats, +) -> Option { + let Some(stats) = memory else { + return None; + }; + + let memory_counters = + memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main)); + + profile.add_counter_sample( + memory_counters.usage, + last_timestamp, + stats.resident as f64 - last_memory.resident as f64, + 0, + ); + + let delta = stats.checked_sub(*last_memory); + *last_memory = stats; + delta +} + +fn to_timestamp(time: std::time::Duration) -> Timestamp { + Timestamp::from_nanos_since_reference(time.as_nanos() as u64) +} + +fn make_frames( + span: NewSpan, + spans: &HashMap, + calls: &HashMap, + subcategory: CategoryPairHandle, +) -> Vec { + let mut frames = Vec::new(); + let mut current_span = span; + loop { + let frame = make_frame(current_span, calls, subcategory); + frames.push(frame); + if let Some(parent) = current_span.parent_id { + current_span = spans.get(&parent).unwrap().0; + } else { + break; + } + } + frames +} + +fn make_frame( + span: NewSpan, + calls: &HashMap, + subcategory: CategoryPairHandle, +) -> FrameInfo { + let (_, call) = calls.get(&span.call_id).unwrap(); + FrameInfo { frame: Frame::Label(*call), category_pair: subcategory, flags: FrameFlags::empty() } +} + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside { time: std::time::Duration, memory: Option }, +} + +struct SpanMarker<'a> { + span: &'a NewSpan, + callsite: &'a NewCallsite, + memory_delta: Option, +} + +impl<'a> ProfilerMarker for SpanMarker<'a> { + const MARKER_TYPE_NAME: &'static str = "span"; + + fn schema() -> MarkerSchema { + let fields = vec![ + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "filename", + label: "File name", + format: MarkerFieldFormat::FilePath, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "line", + label: "Line", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "module_path", + label: "Module path", + format: MarkerFieldFormat::String, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "span_id", + label: "Span ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "thread_id", + label: "Thread ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "resident", + label: "Resident set size, measured in bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + ]; + + MarkerSchema { + type_name: Self::MARKER_TYPE_NAME, + locations: vec![ + MarkerLocation::MarkerTable, + MarkerLocation::MarkerChart, + MarkerLocation::TimelineOverview, + ], + chart_label: None, + tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"), + table_label: Some("{marker.data.filename}:{marker.data.line}"), + fields, + } + } + + fn json_marker_data(&self) -> serde_json::Value { + let filename = self.callsite.file.as_deref(); + let line = self.callsite.line; + let module_path = self.callsite.module_path.as_deref(); + let span_id = self.span.id; + let thread_id = self.span.thread_id; + + let mut value = json!({ + "type": Self::MARKER_TYPE_NAME, + "filename": filename, + "line": line, + "module_path": module_path, + "span_id": span_id, + "thread_id": thread_id, + }); + + if let Some(MemoryStats { resident }) = self.memory_delta { + value["resident"] = json!(resident); + } + + value + } +} + +struct EventMarker<'a> { + event: &'a Event, + callsite: &'a NewCallsite, + memory_delta: Option, +} + +impl<'a> ProfilerMarker for EventMarker<'a> { + const MARKER_TYPE_NAME: &'static str = "tracing-event"; + + fn schema() -> MarkerSchema { + let fields = vec![ + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "filename", + label: "File name", + format: MarkerFieldFormat::FilePath, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "line", + label: "Line", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "module_path", + label: "Module path", + format: MarkerFieldFormat::String, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "parent_span_id", + label: "Parent Span ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "thread_id", + label: "Thread ID", + format: MarkerFieldFormat::Integer, + searchable: true, + }), + MarkerSchemaField::Dynamic(MarkerDynamicField { + key: "resident", + label: "Resident set size, measured in bytes while this function was executing", + format: MarkerFieldFormat::Bytes, + searchable: false, + }), + ]; + + MarkerSchema { + type_name: Self::MARKER_TYPE_NAME, + locations: vec![ + MarkerLocation::MarkerTable, + MarkerLocation::MarkerChart, + MarkerLocation::TimelineOverview, + ], + chart_label: None, + tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"), + table_label: Some("{marker.data.filename}:{marker.data.line}"), + fields, + } + } + + fn json_marker_data(&self) -> serde_json::Value { + let filename = self.callsite.file.as_deref(); + let line = self.callsite.line; + let module_path = self.callsite.module_path.as_deref(); + let span_id = self.event.parent_id; + let thread_id = self.event.thread_id; + + let mut value = json!({ + "type": Self::MARKER_TYPE_NAME, + "filename": filename, + "line": line, + "module_path": module_path, + "parent_span_id": span_id, + "thread_id": thread_id, + }); + + if let Some(MemoryStats { resident }) = self.memory_delta { + value["resident"] = json!(resident); + } + + value + } +} diff --git a/tracing-trace/src/processor/fmt.rs b/tracing-trace/src/processor/fmt.rs new file mode 100644 index 000000000..68e95c00c --- /dev/null +++ b/tracing-trace/src/processor/fmt.rs @@ -0,0 +1,195 @@ +use std::collections::HashMap; +use std::io::Read; + +use crate::entry::{ + Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter, + SpanExit, SpanId, +}; +use crate::{Error, TraceReader}; + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +pub fn print_trace(trace: TraceReader) -> Result<(), Error> { + let mut calls = HashMap::new(); + let mut threads = HashMap::new(); + let mut spans = HashMap::new(); + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + calls.insert(callsite.call_id, callsite); + } + Entry::NewThread(NewThread { thread_id, name }) => { + threads.insert(thread_id, name); + } + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + + let span = *span; + + match memory { + Some(stats) => println!( + "[{}]{}::{} ({}) <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_memory(stats), + ), + None => println!( + "[{}]{}::{} <-", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + ), + } + } + Entry::SpanExit(SpanExit { id, time, memory }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + + match memory { + Some(stats) => println!( + "[{}]{}::{} ({}) -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_memory(stats), + print_duration(time - begin), + ), + None => println!( + "[{}]{}::{} -> {}", + print_thread(&threads, span.thread_id), + print_backtrace(&spans, &calls, &span), + print_span(&calls, &span), + print_duration(time - begin), + ), + } + } + Entry::SpanClose(SpanClose { id, time: _ }) => { + spans.remove(&id); + } + Entry::Event(Event { call_id, thread_id, parent_id, time: _, memory }) => { + let parent_span = parent_id.and_then(|parent_id| spans.get(&parent_id)).and_then( + |(span, status)| match status { + SpanStatus::Outside => None, + SpanStatus::Inside(_) => Some(span), + }, + ); + match (parent_span, memory) { + (Some(parent_span), Some(stats)) => println!( + "[{}]{}::{} ({}) event: {}", + print_thread(&threads, thread_id), + print_backtrace(&spans, &calls, parent_span), + print_span(&calls, parent_span), + print_memory(stats), + print_call(&calls, call_id) + ), + (Some(parent_span), None) => println!( + "[{}]{}::{} event: {}", + print_thread(&threads, thread_id), + print_backtrace(&spans, &calls, parent_span), + print_span(&calls, parent_span), + print_call(&calls, call_id) + ), + (None, None) => println!( + "[{}] event: {}", + print_thread(&threads, thread_id), + print_call(&calls, call_id) + ), + (None, Some(stats)) => println!( + "[{}] ({}) event: {}", + print_thread(&threads, thread_id), + print_memory(stats), + print_call(&calls, call_id) + ), + } + } + } + } + Ok(()) +} + +fn print_thread(threads: &HashMap>, thread_id: ResourceId) -> String { + let thread = threads.get(&thread_id).unwrap(); + let thread = + thread.as_ref().cloned().unwrap_or_else(|| format!("ThreadId({})", thread_id.to_usize())); + thread +} + +fn print_backtrace( + spans: &HashMap, + calls: &HashMap, + span: &NewSpan, +) -> String { + let mut parents = Vec::new(); + let mut current = span.parent_id; + while let Some(current_id) = ¤t { + let (span, _) = spans.get(current_id).unwrap(); + let callsite = calls.get(&span.call_id).unwrap(); + parents.push(callsite.name.clone()); + + current = span.parent_id; + } + + let x: Vec = parents.into_iter().rev().map(|x| x.to_string()).collect(); + x.join("::") +} + +fn print_span(calls: &HashMap, span: &NewSpan) -> String { + print_call(calls, span.call_id) +} + +fn print_call(calls: &HashMap, call_id: ResourceId) -> String { + let callsite = calls.get(&call_id).unwrap(); + match (callsite.file.clone(), callsite.line) { + (Some(file), None) => format!("{} ({})", callsite.name, file), + (Some(file), Some(line)) => format!("{} ({}:{})", callsite.name, file, line), + _ => callsite.name.to_string(), + } +} + +fn print_duration(duration: std::time::Duration) -> String { + if duration.as_nanos() < 1000 { + format!("{}ns", duration.as_nanos()) + } else if duration.as_micros() < 1000 { + format!("{}μs", duration.as_micros()) + } else if duration.as_millis() < 1000 { + format!("{}ms", duration.as_millis()) + } else if duration.as_secs() < 120 { + format!("{}s", duration.as_secs()) + } else if duration.as_secs_f64() / 60.0 < 60.0 { + format!("{}min", duration.as_secs_f64() / 60.0) + } else if duration.as_secs_f64() / 3600.0 < 8.0 { + format!("{}h", duration.as_secs_f64() / 3600.0) + } else { + format!("{}d", duration.as_secs_f64() / 3600.0 / 24.0) + } +} + +/// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes. +fn print_memory(MemoryStats { resident }: MemoryStats) -> String { + use byte_unit::Byte; + let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true); + format!("RSS {rss_bytes:.2}") +} diff --git a/tracing-trace/src/processor/mod.rs b/tracing-trace/src/processor/mod.rs new file mode 100644 index 000000000..ea445b0a5 --- /dev/null +++ b/tracing-trace/src/processor/mod.rs @@ -0,0 +1,3 @@ +pub mod firefox_profiler; +pub mod fmt; +pub mod span_stats; diff --git a/tracing-trace/src/processor/span_stats.rs b/tracing-trace/src/processor/span_stats.rs new file mode 100644 index 000000000..f3e6238ff --- /dev/null +++ b/tracing-trace/src/processor/span_stats.rs @@ -0,0 +1,79 @@ +use std::collections::{BTreeMap, HashMap}; +use std::time::Duration; + +use serde::{Deserialize, Serialize}; + +use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit}; +use crate::{Error, TraceReader}; + +#[derive(Debug, Clone, Copy)] +enum SpanStatus { + Outside, + Inside(std::time::Duration), +} + +#[derive(Serialize, Deserialize)] +pub struct CallStats { + pub call_count: usize, + pub time: u64, +} + +pub fn to_call_stats( + trace: TraceReader, +) -> Result, Error> { + let mut calls = HashMap::new(); + let mut spans = HashMap::new(); + for entry in trace { + let entry = entry?; + match entry { + Entry::NewCallsite(callsite) => { + calls.insert(callsite.call_id, (callsite, vec![])); + } + Entry::NewThread(_) => {} + Entry::NewSpan(span) => { + spans.insert(span.id, (span, SpanStatus::Outside)); + } + Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => { + let (_, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Outside = status else { + continue; + }; + + *status = SpanStatus::Inside(time); + } + Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => { + let (span, status) = spans.get_mut(&id).unwrap(); + + let SpanStatus::Inside(begin) = status else { + continue; + }; + let begin = *begin; + + *status = SpanStatus::Outside; + + let span = *span; + let (_, call_list) = calls.get_mut(&span.call_id).unwrap(); + call_list.push(end - begin); + } + Entry::SpanClose(SpanClose { id, time: _ }) => { + spans.remove(&id); + } + Entry::Event(_) => {} + } + } + + Ok(calls + .into_iter() + .map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls))) + .collect()) +} + +fn site_to_string(call_site: NewCallsite) -> String { + format!("{}::{}", call_site.target, call_site.name) +} +fn calls_to_stats(calls: Vec) -> CallStats { + let nb = calls.len(); + let sum: Duration = calls.iter().sum(); + CallStats { call_count: nb, time: sum.as_nanos() as u64 } +}