4391: Tracing r=dureuill a=irevoire

# Pull Request

- [ ] Hide the parameters of the process batch
- [x] Make actix-web trace every call on every route
- [x] Remove all `env_logger`/`logs` dependencies
- [x] Be able to enable or disable the memory measurement using the `/logs` route parameters

See the following product discussion: https://github.com/orgs/meilisearch/discussions/721

Supersedes https://github.com/meilisearch/meilisearch/pull/4338

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4317

## What does this PR do?

Update the format of the logs from:
```
[2024-02-06T14:54:11Z INFO  actix_server::builder] starting 10 workers
```

to

```
2024-02-06T13:58:14.710803Z  INFO actix_server::builder: 200: starting 10 workers
```

First, run meilisearch with the route enabled via the feature flag:
- `cargo run --experimental-enable-logs-route`
- Or at runtime by sending the following payload:
```
curl \
  -X PATCH 'http://localhost:7700/experimental-features/' \
  -H 'Content-Type: application/json'  \
--data-binary '{
    "logsRoute": true
  }'
```

Then gather data from meilisearch by calling for example:
```
curl \
	-X POST http://localhost:7700/logs \
	-H 'Content-Type: application/json' \
	--data-binary '{
	    "mode": "fmt",
            "target": "milli=trace"
    }'
```

Once your operation is over, tell meilisearch to stop the route:
```
curl \
	-X DELETE http://localhost:7700/logs
```

----

In the case you’re profiling code, you will be interested by the next command that converts the output of the route to a format that the firefox profiler can understand.

```bash
cargo run --release --bin trace-to-firefox -- 2024-01-17_17:07:55-indexing-trace.json
```

Then go to https://profiler.firefox.com and load it.
Note that we can also share the profiles using the https://share.firefox.dev website.


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-02-08 14:16:56 +00:00 committed by GitHub
commit 023c2d755f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
83 changed files with 2656 additions and 208 deletions

274
Cargo.lock generated
View File

@ -535,6 +535,26 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "bindgen"
version = "0.68.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "726e4313eb6ec35d2730258ad4e15b547ee75d6afaa1361a922e78e59b7d8078"
dependencies = [
"bitflags 2.4.1",
"cexpr",
"clang-sys",
"lazy_static",
"lazycell",
"peeking_take_while",
"proc-macro2",
"quote",
"regex",
"rustc-hash",
"shlex",
"syn 2.0.48",
]
[[package]] [[package]]
name = "bit-set" name = "bit-set"
version = "0.5.3" version = "0.5.3"
@ -831,6 +851,15 @@ dependencies = [
"smallvec", "smallvec",
] ]
[[package]]
name = "cexpr"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
dependencies = [
"nom",
]
[[package]] [[package]]
name = "cfg-if" name = "cfg-if"
version = "1.0.0" version = "1.0.0"
@ -915,6 +944,17 @@ dependencies = [
"inout", "inout",
] ]
[[package]]
name = "clang-sys"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
dependencies = [
"glob",
"libc",
"libloading",
]
[[package]] [[package]]
name = "clap" name = "clap"
version = "4.4.17" version = "4.4.17"
@ -961,6 +1001,18 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
[[package]]
name = "color-spantrace"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2"
dependencies = [
"once_cell",
"owo-colors",
"tracing-core",
"tracing-error",
]
[[package]] [[package]]
name = "colorchoice" name = "colorchoice"
version = "1.0.0" version = "1.0.0"
@ -1286,6 +1338,15 @@ dependencies = [
"syn 2.0.48", "syn 2.0.48",
] ]
[[package]]
name = "debugid"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
dependencies = [
"uuid",
]
[[package]] [[package]]
name = "deduplicating_array" name = "deduplicating_array"
version = "0.1.5" version = "0.1.5"
@ -1482,7 +1543,6 @@ dependencies = [
"big_s", "big_s",
"flate2", "flate2",
"http 0.2.11", "http 0.2.11",
"log",
"maplit", "maplit",
"meili-snap", "meili-snap",
"meilisearch-auth", "meilisearch-auth",
@ -1496,6 +1556,7 @@ dependencies = [
"tempfile", "tempfile",
"thiserror", "thiserror",
"time", "time",
"tracing",
"uuid", "uuid",
] ]
@ -1911,6 +1972,19 @@ dependencies = [
"byteorder", "byteorder",
] ]
[[package]]
name = "fxprof-processed-profile"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd"
dependencies = [
"bitflags 2.4.1",
"debugid",
"fxhash",
"serde",
"serde_json",
]
[[package]] [[package]]
name = "gemm" name = "gemm"
version = "0.17.0" version = "0.17.0"
@ -2869,11 +2943,9 @@ dependencies = [
"file-store", "file-store",
"flate2", "flate2",
"insta", "insta",
"log",
"meili-snap", "meili-snap",
"meilisearch-auth", "meilisearch-auth",
"meilisearch-types", "meilisearch-types",
"nelson",
"page_size 0.5.0", "page_size 0.5.0",
"puffin", "puffin",
"roaring", "roaring",
@ -2883,6 +2955,7 @@ dependencies = [
"tempfile", "tempfile",
"thiserror", "thiserror",
"time", "time",
"tracing",
"ureq", "ureq",
"uuid", "uuid",
] ]
@ -3085,6 +3158,12 @@ version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]] [[package]]
name = "levenshtein_automata" name = "levenshtein_automata"
version = "0.2.1" version = "0.2.1"
@ -3112,6 +3191,16 @@ dependencies = [
"pkg-config", "pkg-config",
] ]
[[package]]
name = "libloading"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c571b676ddfc9a8c12f1f3d3085a7b163966a8fd8098a90640953ce5f6170161"
dependencies = [
"cfg-if",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "libm" name = "libm"
version = "0.2.7" version = "0.2.7"
@ -3128,6 +3217,17 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "libproc"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "229004ebba9d1d5caf41623f1523b6d52abb47d9f6ab87f7e6fc992e3b854aef"
dependencies = [
"bindgen",
"errno",
"libc",
]
[[package]] [[package]]
name = "libz-sys" name = "libz-sys"
version = "1.1.12" version = "1.1.12"
@ -3586,7 +3686,6 @@ dependencies = [
"deserr", "deserr",
"dump", "dump",
"either", "either",
"env_logger",
"file-store", "file-store",
"flate2", "flate2",
"fst", "fst",
@ -3601,7 +3700,6 @@ dependencies = [
"itertools 0.11.0", "itertools 0.11.0",
"jsonwebtoken", "jsonwebtoken",
"lazy_static", "lazy_static",
"log",
"manifest-dir-macros", "manifest-dir-macros",
"maplit", "maplit",
"meili-snap", "meili-snap",
@ -3644,6 +3742,10 @@ dependencies = [
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"toml", "toml",
"tracing",
"tracing-actix-web",
"tracing-subscriber",
"tracing-trace",
"url", "url",
"urlencoding", "urlencoding",
"uuid", "uuid",
@ -3776,7 +3878,6 @@ dependencies = [
"json-depth-checker", "json-depth-checker",
"levenshtein_automata", "levenshtein_automata",
"liquid", "liquid",
"log",
"logging_timer", "logging_timer",
"maplit", "maplit",
"md5", "md5",
@ -3805,6 +3906,7 @@ dependencies = [
"time", "time",
"tokenizers", "tokenizers",
"tokio", "tokio",
"tracing",
"uuid", "uuid",
] ]
@ -3882,9 +3984,10 @@ dependencies = [
] ]
[[package]] [[package]]
name = "nelson" name = "mutually_exclusive_features"
version = "0.1.0" version = "0.0.3"
source = "git+https://github.com/meilisearch/nelson.git?rev=675f13885548fb415ead8fbb447e9e6d9314000a#675f13885548fb415ead8fbb447e9e6d9314000a" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
[[package]] [[package]]
name = "nom" name = "nom"
@ -3916,6 +4019,16 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]] [[package]]
name = "num-bigint" name = "num-bigint"
version = "0.4.3" version = "0.4.3"
@ -4037,6 +4150,18 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "owo-colors"
version = "3.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
[[package]] [[package]]
name = "page_size" name = "page_size"
version = "0.5.0" version = "0.5.0"
@ -4124,6 +4249,12 @@ dependencies = [
"sha2", "sha2",
] ]
[[package]]
name = "peeking_take_while"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
[[package]] [[package]]
name = "pem" name = "pem"
version = "1.1.1" version = "1.1.1"
@ -4243,6 +4374,26 @@ dependencies = [
"siphasher 0.3.11", "siphasher 0.3.11",
] ]
[[package]]
name = "pin-project"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0"
dependencies = [
"pin-project-internal",
]
[[package]]
name = "pin-project-internal"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.48",
]
[[package]] [[package]]
name = "pin-project-lite" name = "pin-project-lite"
version = "0.2.13" version = "0.2.13"
@ -4994,6 +5145,21 @@ dependencies = [
"digest", "digest",
] ]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]] [[package]]
name = "signal-hook-registry" name = "signal-hook-registry"
version = "1.4.1" version = "1.4.1"
@ -5326,6 +5492,16 @@ dependencies = [
"syn 2.0.48", "syn 2.0.48",
] ]
[[package]]
name = "thread_local"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]] [[package]]
name = "tiktoken-rs" name = "tiktoken-rs"
version = "0.5.8" version = "0.5.8"
@ -5554,17 +5730,29 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]] [[package]]
name = "tracing" name = "tracing"
version = "0.1.37" version = "0.1.40"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
dependencies = [ dependencies = [
"cfg-if",
"log", "log",
"pin-project-lite", "pin-project-lite",
"tracing-attributes", "tracing-attributes",
"tracing-core", "tracing-core",
] ]
[[package]]
name = "tracing-actix-web"
version = "0.7.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fe0d5feac3f4ca21ba33496bcb1ccab58cca6412b1405ae80f0581541e0ca78"
dependencies = [
"actix-web",
"mutually_exclusive_features",
"pin-project",
"tracing",
"uuid",
]
[[package]] [[package]]
name = "tracing-attributes" name = "tracing-attributes"
version = "0.1.27" version = "0.1.27"
@ -5578,11 +5766,63 @@ dependencies = [
[[package]] [[package]]
name = "tracing-core" name = "tracing-core"
version = "0.1.31" version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [ dependencies = [
"once_cell", "once_cell",
"valuable",
]
[[package]]
name = "tracing-error"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d686ec1c0f384b1277f097b2f279a2ecc11afe8c133c1aabf036a27cb4cd206e"
dependencies = [
"tracing",
"tracing-subscriber",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"nu-ansi-term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
name = "tracing-trace"
version = "0.1.0"
dependencies = [
"byte-unit",
"color-spantrace",
"fxprof-processed-profile",
"libproc",
"serde",
"serde_json",
"tokio",
"tracing",
"tracing-error",
"tracing-subscriber",
] ]
[[package]] [[package]]
@ -5758,6 +5998,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]] [[package]]
name = "vcpkg" name = "vcpkg"
version = "0.2.15" version = "0.2.15"

View File

@ -16,12 +16,16 @@ members = [
"json-depth-checker", "json-depth-checker",
"benchmarks", "benchmarks",
"fuzzers", "fuzzers",
"tracing-trace",
"xtask", "xtask",
] ]
[workspace.package] [workspace.package]
version = "1.6.1" version = "1.6.1"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"] authors = [
"Quentin de Quelen <quentin@dequelen.me>",
"Clément Renault <clement@meilisearch.com>",
]
description = "Meilisearch HTTP server" description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com" homepage = "https://meilisearch.com"
readme = "README.md" readme = "README.md"

View File

@ -14,7 +14,6 @@ license.workspace = true
anyhow = "1.0.79" anyhow = "1.0.79"
flate2 = "1.0.28" flate2 = "1.0.28"
http = "0.2.11" http = "0.2.11"
log = "0.4.20"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
once_cell = "1.19.0" once_cell = "1.19.0"
@ -26,6 +25,7 @@ tar = "0.4.40"
tempfile = "3.9.0" tempfile = "3.9.0"
thiserror = "1.0.56" thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] }
tracing = "0.1.40"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]

View File

@ -120,7 +120,7 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
criterion.as_ref().map(ToString::to_string) criterion.as_ref().map(ToString::to_string)
} }
Err(()) => { Err(()) => {
log::warn!( tracing::warn!(
"Could not import the following ranking rule: `{}`.", "Could not import the following ranking rule: `{}`.",
ranking_rule ranking_rule
); );
@ -152,11 +152,11 @@ impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
use v2::updates::UpdateStatus as UpdateStatusV2; use v2::updates::UpdateStatus as UpdateStatusV2;
Some(match source { Some(match source {
UpdateStatusV1::Enqueued { content } => { UpdateStatusV1::Enqueued { content } => {
log::warn!( tracing::warn!(
"Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)", "Cannot import task {} (importing enqueued tasks from v1 dumps is unsupported)",
content.update_id content.update_id
); );
log::warn!("Task will be skipped in the queue of imported tasks."); tracing::warn!("Task will be skipped in the queue of imported tasks.");
return None; return None;
} }
@ -229,7 +229,7 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
Some(match source { Some(match source {
v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments, v1::update::UpdateType::ClearAll => v2::updates::UpdateMeta::ClearDocuments,
v1::update::UpdateType::Customs => { v1::update::UpdateType::Customs => {
log::warn!("Ignoring task with type 'Customs' that is no longer supported"); tracing::warn!("Ignoring task with type 'Customs' that is no longer supported");
return None; return None;
} }
v1::update::UpdateType::DocumentsAddition { .. } => { v1::update::UpdateType::DocumentsAddition { .. } => {
@ -296,7 +296,7 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity), v1::settings::RankingRule::Proximity => Some(v2::settings::Criterion::Proximity),
v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute), v1::settings::RankingRule::Attribute => Some(v2::settings::Criterion::Attribute),
v1::settings::RankingRule::WordsPosition => { v1::settings::RankingRule::WordsPosition => {
log::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes"); tracing::warn!("Removing the 'WordsPosition' ranking rule that is no longer supported, please check the resulting ranking rules of your indexes");
None None
} }
v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness), v1::settings::RankingRule::Exactness => Some(v2::settings::Criterion::Exactness),

View File

@ -146,8 +146,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
started_processing_at: processing.started_processing_at, started_processing_at: processing.started_processing_at,
}), }),
Err(e) => { Err(e) => {
log::warn!("Error with task {}: {}", processing.from.update_id, e); tracing::warn!("Error with task {}: {}", processing.from.update_id, e);
log::warn!("Task will be marked as `Failed`."); tracing::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed { v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing { from: v3::updates::Processing {
from: v3::updates::Enqueued { from: v3::updates::Enqueued {
@ -172,8 +172,8 @@ impl From<v2::updates::UpdateStatus> for v3::updates::UpdateStatus {
enqueued_at: enqueued.enqueued_at, enqueued_at: enqueued.enqueued_at,
}), }),
Err(e) => { Err(e) => {
log::warn!("Error with task {}: {}", enqueued.update_id, e); tracing::warn!("Error with task {}: {}", enqueued.update_id, e);
log::warn!("Task will be marked as `Failed`."); tracing::warn!("Task will be marked as `Failed`.");
v3::updates::UpdateStatus::Failed(v3::updates::Failed { v3::updates::UpdateStatus::Failed(v3::updates::Failed {
from: v3::updates::Processing { from: v3::updates::Processing {
from: v3::updates::Enqueued { from: v3::updates::Enqueued {
@ -353,7 +353,7 @@ impl From<String> for v3::Code {
"malformed_payload" => v3::Code::MalformedPayload, "malformed_payload" => v3::Code::MalformedPayload,
"missing_payload" => v3::Code::MissingPayload, "missing_payload" => v3::Code::MissingPayload,
other => { other => {
log::warn!("Unknown error code {}", other); tracing::warn!("Unknown error code {}", other);
v3::Code::UnretrievableErrorCode v3::Code::UnretrievableErrorCode
} }
} }

View File

@ -76,20 +76,20 @@ impl CompatV3ToV4 {
let index_uid = match index_uid { let index_uid = match index_uid {
Some(uid) => uid, Some(uid) => uid,
None => { None => {
log::warn!( tracing::warn!(
"Error while importing the update {}.", "Error while importing the update {}.",
task.update.id() task.update.id()
); );
log::warn!( tracing::warn!(
"The index associated to the uuid `{}` could not be retrieved.", "The index associated to the uuid `{}` could not be retrieved.",
task.uuid.to_string() task.uuid.to_string()
); );
if task.update.is_finished() { if task.update.is_finished() {
// we're fucking with his history but not his data, that's ok-ish. // we're fucking with his history but not his data, that's ok-ish.
log::warn!("The index-uuid will be set as `unknown`."); tracing::warn!("The index-uuid will be set as `unknown`.");
String::from("unknown") String::from("unknown")
} else { } else {
log::warn!("The task will be ignored."); tracing::warn!("The task will be ignored.");
return None; return None;
} }
} }

View File

@ -305,7 +305,7 @@ impl From<v4::ResponseError> for v5::ResponseError {
"invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt,
"invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription,
other => { other => {
log::warn!("Unknown error code {}", other); tracing::warn!("Unknown error code {}", other);
v5::Code::UnretrievableErrorCode v5::Code::UnretrievableErrorCode
} }
}; };

View File

@ -304,7 +304,7 @@ impl From<v5::ResponseError> for v6::ResponseError {
"immutable_field" => v6::Code::BadRequest, "immutable_field" => v6::Code::BadRequest,
"api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists,
other => { other => {
log::warn!("Unknown error code {}", other); tracing::warn!("Unknown error code {}", other);
v6::Code::UnretrievableErrorCode v6::Code::UnretrievableErrorCode
} }
}; };
@ -329,7 +329,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
new_ranking_rules.push(new_rule); new_ranking_rules.push(new_rule);
} }
Err(_) => { Err(_) => {
log::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.") tracing::warn!("Error while importing settings. The ranking rule `{rule}` does not exist anymore.")
} }
} }
} }

View File

@ -2,10 +2,10 @@ use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind}; use std::io::{BufRead, BufReader, ErrorKind};
use std::path::Path; use std::path::Path;
use log::debug;
pub use meilisearch_types::milli; pub use meilisearch_types::milli;
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use uuid::Uuid; use uuid::Uuid;
use super::Document; use super::Document;

View File

@ -19,7 +19,6 @@ dump = { path = "../dump" }
enum-iterator = "1.5.0" enum-iterator = "1.5.0"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" flate2 = "1.0.28"
log = "0.4.20"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0" page_size = "0.5.0"
@ -30,7 +29,13 @@ serde_json = { version = "1.0.111", features = ["preserve_order"] }
synchronoise = "1.0.1" synchronoise = "1.0.1"
tempfile = "3.9.0" tempfile = "3.9.0"
thiserror = "1.0.56" thiserror = "1.0.56"
time = { version = "0.3.31", features = ["serde-well-known", "formatting", "parsing", "macros"] } time = { version = "0.3.31", features = [
"serde-well-known",
"formatting",
"parsing",
"macros",
] }
tracing = "0.1.40"
ureq = "2.9.1" ureq = "2.9.1"
uuid = { version = "1.6.1", features = ["serde", "v4"] } uuid = { version = "1.6.1", features = ["serde", "v4"] }
@ -39,4 +44,3 @@ big_s = "1.0.2"
crossbeam = "0.8.4" crossbeam = "0.8.4"
insta = { version = "1.34.0", features = ["json", "redactions"] } insta = { version = "1.34.0", features = ["json", "redactions"] }
meili-snap = { path = "../meili-snap" } meili-snap = { path = "../meili-snap" }
nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"}

View File

@ -24,7 +24,6 @@ use std::fs::{self, File};
use std::io::BufWriter; use std::io::BufWriter;
use dump::IndexMetadata; use dump::IndexMetadata;
use log::{debug, error, info, trace};
use meilisearch_types::error::Code; use meilisearch_types::error::Code;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -514,6 +513,7 @@ impl IndexScheduler {
/// 3. We get the *next* snapshot to process. /// 3. We get the *next* snapshot to process.
/// 4. We get the *next* dump to process. /// 4. We get the *next* dump to process.
/// 5. We get the *next* tasks to process for a specific index. /// 5. We get the *next* tasks to process for a specific index.
#[tracing::instrument(level = "trace", skip(self, rtxn), target = "indexing::scheduler")]
pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> { pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result<Option<Batch>> {
#[cfg(test)] #[cfg(test)]
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
@ -619,6 +619,7 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned /// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields /// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> { pub(crate) fn process_batch(&self, batch: Batch) -> Result<Vec<Task>> {
#[cfg(test)] #[cfg(test)]
{ {
@ -668,9 +669,10 @@ impl IndexScheduler {
Ok(()) => { Ok(()) => {
for content_uuid in canceled_tasks_content_uuids { for content_uuid in canceled_tasks_content_uuids {
if let Err(error) = self.delete_update_file(content_uuid) { if let Err(error) = self.delete_update_file(content_uuid) {
error!( tracing::error!(
"We failed deleting the content file indentified as {}: {}", file_content_uuid = %content_uuid,
content_uuid, error %error,
"Failed deleting content file"
) )
} }
} }
@ -969,7 +971,10 @@ impl IndexScheduler {
match res { match res {
Ok(_) => (), Ok(_) => (),
Err(e) => error!("Could not write the stats of the index {}", e), Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
} }
Ok(tasks) Ok(tasks)
@ -997,7 +1002,7 @@ impl IndexScheduler {
builder.set_primary_key(primary_key); builder.set_primary_key(primary_key);
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
builder.execute( builder.execute(
|indexing_step| debug!("update: {:?}", indexing_step), |indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
index_wtxn.commit()?; index_wtxn.commit()?;
@ -1024,7 +1029,10 @@ impl IndexScheduler {
match res { match res {
Ok(_) => (), Ok(_) => (),
Err(e) => error!("Could not write the stats of the index {}", e), Err(e) => tracing::error!(
error = &e as &dyn std::error::Error,
"Could not write the stats of the index"
),
} }
Ok(vec![task]) Ok(vec![task])
@ -1143,6 +1151,11 @@ impl IndexScheduler {
/// ///
/// ## Return /// ## Return
/// The list of processed tasks. /// The list of processed tasks.
#[tracing::instrument(
level = "trace",
skip(self, index_wtxn, index),
target = "indexing::scheduler"
)]
fn apply_index_operation<'i>( fn apply_index_operation<'i>(
&self, &self,
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
@ -1203,7 +1216,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config); milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.set_primary_key(primary_key); builder.set_primary_key(primary_key);
builder.execute( builder.execute(
|indexing_step| debug!("update: {:?}", indexing_step), |indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.clone().get(), || must_stop_processing.clone().get(),
)?; )?;
primary_key_has_been_set = true; primary_key_has_been_set = true;
@ -1222,7 +1235,7 @@ impl IndexScheduler {
index, index,
indexer_config, indexer_config,
config, config,
|indexing_step| trace!("update: {:?}", indexing_step), |indexing_step| tracing::trace!(?indexing_step, "Update"),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
@ -1294,7 +1307,7 @@ impl IndexScheduler {
if !tasks.iter().all(|res| res.error.is_some()) { if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?; let addition = builder.execute()?;
info!("document addition done: {:?}", addition); tracing::info!(indexing_result = ?addition, "document indexing done");
} else if primary_key_has_been_set { } else if primary_key_has_been_set {
// Everything failed but we've set a primary key. // Everything failed but we've set a primary key.
// We need to remove it. // We need to remove it.
@ -1302,7 +1315,7 @@ impl IndexScheduler {
milli::update::Settings::new(index_wtxn, index, indexer_config); milli::update::Settings::new(index_wtxn, index, indexer_config);
builder.reset_primary_key(); builder.reset_primary_key();
builder.execute( builder.execute(
|indexing_step| trace!("update: {:?}", indexing_step), |indexing_step| tracing::trace!(update = ?indexing_step),
|| must_stop_processing.clone().get(), || must_stop_processing.clone().get(),
)?; )?;
} }
@ -1372,7 +1385,7 @@ impl IndexScheduler {
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
builder.execute( builder.execute(
|indexing_step| debug!("update: {:?}", indexing_step), |indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;
@ -1584,7 +1597,7 @@ fn delete_document_by_filter<'a>(
index, index,
indexer_config, indexer_config,
config, config,
|indexing_step| debug!("update: {:?}", indexing_step), |indexing_step| tracing::debug!(update = ?indexing_step),
|| must_stop_processing.get(), || must_stop_processing.get(),
)?; )?;

View File

@ -43,6 +43,19 @@ impl RoFeatures {
} }
} }
pub fn check_logs_route(&self) -> Result<()> {
if self.runtime.logs_route {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "getting logs through the `/logs/stream` route",
feature: "logs route",
issue_link: "https://github.com/orgs/meilisearch/discussions/721",
}
.into())
}
}
pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> { pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
if self.runtime.vector_store { if self.runtime.vector_store {
Ok(()) Ok(())
@ -81,6 +94,7 @@ impl FeatureData {
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics, metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
..persisted_features ..persisted_features
})); }));

View File

@ -3,13 +3,13 @@ use std::sync::{Arc, RwLock};
use std::time::Duration; use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use log::error;
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index}; use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::error;
use uuid::Uuid; use uuid::Uuid;
use self::index_map::IndexMap; use self::index_map::IndexMap;

View File

@ -535,17 +535,17 @@ impl IndexScheduler {
let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) { let budget = if Self::is_good_heed(tasks_path, DEFAULT_BUDGET) {
DEFAULT_BUDGET DEFAULT_BUDGET
} else { } else {
log::debug!("determining budget with dichotomic search"); tracing::debug!("determining budget with dichotomic search");
utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| { utils::dichotomic_search(DEFAULT_BUDGET / 2, |map_size| {
Self::is_good_heed(tasks_path, map_size) Self::is_good_heed(tasks_path, map_size)
}) })
}; };
log::debug!("memmap budget: {budget}B"); tracing::debug!("memmap budget: {budget}B");
let mut budget = budget / 2; let mut budget = budget / 2;
if task_db_size > (budget / 2) { if task_db_size > (budget / 2) {
task_db_size = clamp_to_page_size(budget * 2 / 5); task_db_size = clamp_to_page_size(budget * 2 / 5);
log::debug!( tracing::debug!(
"Decreasing max size of task DB to {task_db_size}B due to constrained memory space" "Decreasing max size of task DB to {task_db_size}B due to constrained memory space"
); );
} }
@ -555,13 +555,13 @@ impl IndexScheduler {
let budget = budget; let budget = budget;
let task_db_size = task_db_size; let task_db_size = task_db_size;
log::debug!("index budget: {budget}B"); tracing::debug!("index budget: {budget}B");
let mut index_count = budget / base_map_size; let mut index_count = budget / base_map_size;
if index_count < 2 { if index_count < 2 {
// take a bit less than half than the budget to make sure we can always afford to open an index // take a bit less than half than the budget to make sure we can always afford to open an index
let map_size = (budget * 2) / 5; let map_size = (budget * 2) / 5;
// single index of max budget // single index of max budget
log::debug!("1 index of {map_size}B can be opened simultaneously."); tracing::debug!("1 index of {map_size}B can be opened simultaneously.");
return IndexBudget { map_size, index_count: 1, task_db_size }; return IndexBudget { map_size, index_count: 1, task_db_size };
} }
// give us some space for an additional index when the cache is already full // give us some space for an additional index when the cache is already full
@ -570,7 +570,7 @@ impl IndexScheduler {
if index_count > max_index_count { if index_count > max_index_count {
index_count = max_index_count; index_count = max_index_count;
} }
log::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously."); tracing::debug!("Up to {index_count} indexes of {base_map_size}B opened simultaneously.");
IndexBudget { map_size: base_map_size, index_count, task_db_size } IndexBudget { map_size: base_map_size, index_count, task_db_size }
} }
@ -617,7 +617,7 @@ impl IndexScheduler {
Ok(TickOutcome::TickAgain(_)) => (), Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(), Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => { Err(e) => {
log::error!("{e}"); tracing::error!("{e}");
// Wait one second when an irrecoverable error occurs. // Wait one second when an irrecoverable error occurs.
if !e.is_recoverable() { if !e.is_recoverable() {
std::thread::sleep(Duration::from_secs(1)); std::thread::sleep(Duration::from_secs(1));
@ -634,15 +634,15 @@ impl IndexScheduler {
let mut file = match File::create(format!("{}.puffin", now)) { let mut file = match File::create(format!("{}.puffin", now)) {
Ok(file) => file, Ok(file) => file,
Err(e) => { Err(e) => {
log::error!("{e}"); tracing::error!("{e}");
continue; continue;
} }
}; };
if let Err(e) = frame_view.save_to_writer(&mut file) { if let Err(e) = frame_view.save_to_writer(&mut file) {
log::error!("{e}"); tracing::error!("{e}");
} }
if let Err(e) = file.sync_all() { if let Err(e) = file.sync_all() {
log::error!("{e}"); tracing::error!("{e}");
} }
// We erase this frame view as it is no more useful. We want to // We erase this frame view as it is no more useful. We want to
// measure the new frames now that we exported the previous ones. // measure the new frames now that we exported the previous ones.
@ -1190,10 +1190,10 @@ impl IndexScheduler {
self.update_task(&mut wtxn, &task) self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
if let Err(e) = self.delete_persisted_task_data(&task) { if let Err(e) = self.delete_persisted_task_data(&task) {
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
} }
} }
log::info!("A batch of tasks was successfully completed."); tracing::info!("A batch of tasks was successfully completed.");
} }
// If we have an abortion error we must stop the tick here and re-schedule tasks. // If we have an abortion error we must stop the tick here and re-schedule tasks.
Err(Error::Milli(milli::Error::InternalError( Err(Error::Milli(milli::Error::InternalError(
@ -1247,7 +1247,7 @@ impl IndexScheduler {
self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?;
if let Err(e) = self.delete_persisted_task_data(&task) { if let Err(e) = self.delete_persisted_task_data(&task) {
log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); tracing::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid);
} }
self.update_task(&mut wtxn, &task) self.update_task(&mut wtxn, &task)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?;
@ -1341,7 +1341,7 @@ impl IndexScheduler {
}; };
if let Err(e) = request.send(reader) { if let Err(e) = request.send(reader) {
log::error!("While sending data to the webhook: {e}"); tracing::error!("While sending data to the webhook: {e}");
} }
} }
@ -1367,12 +1367,12 @@ impl IndexScheduler {
// /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete // /!\ the len must be at least 2 or else we might enter an infinite loop where we only delete
// the deletion tasks we enqueued ourselves. // the deletion tasks we enqueued ourselves.
if to_delete.len() < 2 { if to_delete.len() < 2 {
log::warn!("The task queue is almost full, but no task can be deleted yet."); tracing::warn!("The task queue is almost full, but no task can be deleted yet.");
// the only thing we can do is hope that the user tasks are going to finish // the only thing we can do is hope that the user tasks are going to finish
return Ok(()); return Ok(());
} }
log::info!( tracing::info!(
"The task queue is almost full. Deleting the oldest {} finished tasks.", "The task queue is almost full. Deleting the oldest {} finished tasks.",
to_delete.len() to_delete.len()
); );

View File

@ -310,6 +310,8 @@ TooManyVectors , InvalidRequest , BAD_REQUEST ;
UnretrievableDocument , Internal , BAD_REQUEST ; UnretrievableDocument , Internal , BAD_REQUEST ;
UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ; UnretrievableErrorCode , InvalidRequest , BAD_REQUEST ;
UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; UnsupportedMediaType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ;
// Experimental features
VectorEmbeddingError , InvalidRequest , BAD_REQUEST VectorEmbeddingError , InvalidRequest , BAD_REQUEST
} }

View File

@ -5,10 +5,12 @@ use serde::{Deserialize, Serialize};
pub struct RuntimeTogglableFeatures { pub struct RuntimeTogglableFeatures {
pub vector_store: bool, pub vector_store: bool,
pub metrics: bool, pub metrics: bool,
pub logs_route: bool,
pub export_puffin_reports: bool, pub export_puffin_reports: bool,
} }
#[derive(Default, Debug, Clone, Copy)] #[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures { pub struct InstanceTogglableFeatures {
pub metrics: bool, pub metrics: bool,
pub logs_route: bool,
} }

View File

@ -42,7 +42,6 @@ crossbeam-channel = "0.5.11"
deserr = { version = "0.6.1", features = ["actix-web"] } deserr = { version = "0.6.1", features = ["actix-web"] }
dump = { path = "../dump" } dump = { path = "../dump" }
either = "1.9.0" either = "1.9.0"
env_logger = "0.10.1"
file-store = { path = "../file-store" } file-store = { path = "../file-store" }
flate2 = "1.0.28" flate2 = "1.0.28"
fst = "0.4.7" fst = "0.4.7"
@ -55,7 +54,6 @@ is-terminal = "0.4.10"
itertools = "0.11.0" itertools = "0.11.0"
jsonwebtoken = "8.3.0" jsonwebtoken = "8.3.0"
lazy_static = "1.4.0" lazy_static = "1.4.0"
log = "0.4.20"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.39", default-features = false }
@ -105,6 +103,10 @@ yaup = "0.2.1"
serde_urlencoded = "0.7.1" serde_urlencoded = "0.7.1"
termcolor = "1.4.1" termcolor = "1.4.1"
url = { version = "2.5.0", features = ["serde"] } url = { version = "2.5.0", features = ["serde"] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
tracing-actix-web = "0.7.9"
[dev-dependencies] [dev-dependencies]
actix-rt = "2.9.0" actix-rt = "2.9.0"

View File

@ -250,6 +250,7 @@ impl super::Analytics for SegmentAnalytics {
struct Infos { struct Infos {
env: String, env: String,
experimental_enable_metrics: bool, experimental_enable_metrics: bool,
experimental_enable_logs_route: bool,
experimental_reduce_indexing_memory_usage: bool, experimental_reduce_indexing_memory_usage: bool,
experimental_max_number_of_batched_tasks: usize, experimental_max_number_of_batched_tasks: usize,
db_path: bool, db_path: bool,
@ -287,6 +288,7 @@ impl From<Opt> for Infos {
let Opt { let Opt {
db_path, db_path,
experimental_enable_metrics, experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
experimental_max_number_of_batched_tasks, experimental_max_number_of_batched_tasks,
http_addr, http_addr,
@ -333,6 +335,7 @@ impl From<Opt> for Infos {
Self { Self {
env, env,
experimental_enable_metrics, experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
db_path: db_path != PathBuf::from("./data.ms"), db_path: db_path != PathBuf::from("./data.ms"),
import_dump: import_dump.is_some(), import_dump: import_dump.is_some(),

View File

@ -12,6 +12,8 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}", #[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))] .0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>), MissingContentType(Vec<String>),
#[error("The `/logs/stream` route is currently in use by someone else.")]
AlreadyUsedLogRoute,
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")] #[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String), CsvDelimiterWithWrongContentType(String),
#[error( #[error(
@ -59,6 +61,7 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code { fn error_code(&self) -> Code {
match self { match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::AlreadyUsedLogRoute => Code::BadRequest,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType, MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,

View File

@ -29,7 +29,6 @@ use error::PayloadError;
use extractors::payload::PayloadConfig; use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -39,6 +38,8 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt; pub use option::Opt;
use option::ScheduleSnapshot; use option::ScheduleSnapshot;
use tracing::{error, info_span};
use tracing_subscriber::filter::Targets;
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@ -86,10 +87,21 @@ fn is_empty_db(db_path: impl AsRef<Path>) -> bool {
} }
} }
/// The handle used to update the logs at runtime. Must be accessible from the `main.rs` and the `route/logs.rs`.
pub type LogRouteHandle =
tracing_subscriber::reload::Handle<LogRouteType, tracing_subscriber::Registry>;
pub type LogRouteType = tracing_subscriber::filter::Filtered<
Option<Box<dyn tracing_subscriber::Layer<tracing_subscriber::Registry> + Send + Sync>>,
Targets,
tracing_subscriber::Registry,
>;
pub fn create_app( pub fn create_app(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth_controller: Data<AuthController>, auth_controller: Data<AuthController>,
opt: Opt, opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
enable_dashboard: bool, enable_dashboard: bool,
) -> actix_web::App< ) -> actix_web::App<
@ -108,6 +120,7 @@ pub fn create_app(
index_scheduler.clone(), index_scheduler.clone(),
auth_controller.clone(), auth_controller.clone(),
&opt, &opt,
logs,
analytics.clone(), analytics.clone(),
) )
}) })
@ -123,11 +136,49 @@ pub fn create_app(
.allow_any_method() .allow_any_method()
.max_age(86_400), // 24h .max_age(86_400), // 24h
) )
.wrap(actix_web::middleware::Logger::default()) .wrap(tracing_actix_web::TracingLogger::<AwebTracingLogger>::new())
.wrap(actix_web::middleware::Compress::default()) .wrap(actix_web::middleware::Compress::default())
.wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim)) .wrap(actix_web::middleware::NormalizePath::new(actix_web::middleware::TrailingSlash::Trim))
} }
struct AwebTracingLogger;
impl tracing_actix_web::RootSpanBuilder for AwebTracingLogger {
fn on_request_start(request: &actix_web::dev::ServiceRequest) -> tracing::Span {
use tracing::field::Empty;
let conn_info = request.connection_info();
let headers = request.headers();
let user_agent = headers
.get(http::header::USER_AGENT)
.map(|value| String::from_utf8_lossy(value.as_bytes()).into_owned())
.unwrap_or_default();
info_span!("HTTP request", method = %request.method(), host = conn_info.host(), route = %request.path(), query_parameters = %request.query_string(), %user_agent, status_code = Empty, error = Empty)
}
fn on_request_end<B: MessageBody>(
span: tracing::Span,
outcome: &Result<ServiceResponse<B>, actix_web::Error>,
) {
match &outcome {
Ok(response) => {
let code: i32 = response.response().status().as_u16().into();
span.record("status_code", code);
if let Some(error) = response.response().error() {
// use the status code already constructed for the outgoing HTTP response
span.record("error", &tracing::field::display(error.as_response_error()));
}
}
Err(error) => {
let code: i32 = error.error_response().status().as_u16().into();
span.record("status_code", code);
span.record("error", &tracing::field::display(error.as_response_error()));
}
};
}
}
enum OnFailure { enum OnFailure {
RemoveDb, RemoveDb,
KeepDb, KeepDb,
@ -280,15 +331,15 @@ fn import_dump(
let mut dump_reader = dump::DumpReader::open(reader)?; let mut dump_reader = dump::DumpReader::open(reader)?;
if let Some(date) = dump_reader.date() { if let Some(date) = dump_reader.date() {
log::info!( tracing::info!(
"Importing a dump of meilisearch `{:?}` from the {}", version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version %date,
date "Importing a dump of meilisearch"
); );
} else { } else {
log::info!( tracing::info!(
"Importing a dump of meilisearch `{:?}`", version = ?dump_reader.version(), // TODO: get the meilisearch version instead of the dump version
dump_reader.version(), // TODO: get the meilisearch version instead of the dump version "Importing a dump of meilisearch",
); );
} }
@ -322,7 +373,7 @@ fn import_dump(
for index_reader in dump_reader.indexes()? { for index_reader in dump_reader.indexes()? {
let mut index_reader = index_reader?; let mut index_reader = index_reader?;
let metadata = index_reader.metadata(); let metadata = index_reader.metadata();
log::info!("Importing index `{}`.", metadata.uid); tracing::info!("Importing index `{}`.", metadata.uid);
let date = Some((metadata.created_at, metadata.updated_at)); let date = Some((metadata.created_at, metadata.updated_at));
let index = index_scheduler.create_raw_index(&metadata.uid, date)?; let index = index_scheduler.create_raw_index(&metadata.uid, date)?;
@ -336,14 +387,15 @@ fn import_dump(
} }
// 4.2 Import the settings. // 4.2 Import the settings.
log::info!("Importing the settings."); tracing::info!("Importing the settings.");
let settings = index_reader.settings()?; let settings = index_reader.settings()?;
apply_settings_to_builder(&settings, &mut builder); apply_settings_to_builder(&settings, &mut builder);
builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?; builder
.execute(|indexing_step| tracing::debug!("update: {:?}", indexing_step), || false)?;
// 4.3 Import the documents. // 4.3 Import the documents.
// 4.3.1 We need to recreate the grenad+obkv format accepted by the index. // 4.3.1 We need to recreate the grenad+obkv format accepted by the index.
log::info!("Importing the documents."); tracing::info!("Importing the documents.");
let file = tempfile::tempfile()?; let file = tempfile::tempfile()?;
let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file));
for document in index_reader.documents()? { for document in index_reader.documents()? {
@ -365,15 +417,16 @@ fn import_dump(
update_method: IndexDocumentsMethod::ReplaceDocuments, update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default() ..Default::default()
}, },
|indexing_step| log::trace!("update: {:?}", indexing_step), |indexing_step| tracing::trace!("update: {:?}", indexing_step),
|| false, || false,
)?; )?;
let (builder, user_result) = builder.add_documents(reader)?; let (builder, user_result) = builder.add_documents(reader)?;
log::info!("{} documents found.", user_result?); let user_result = user_result?;
tracing::info!(documents_found = user_result, "{} documents found.", user_result);
builder.execute()?; builder.execute()?;
wtxn.commit()?; wtxn.commit()?;
log::info!("All documents successfully imported."); tracing::info!("All documents successfully imported.");
} }
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
@ -391,6 +444,7 @@ pub fn configure_data(
index_scheduler: Data<IndexScheduler>, index_scheduler: Data<IndexScheduler>,
auth: Data<AuthController>, auth: Data<AuthController>,
opt: &Opt, opt: &Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) { ) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
@ -398,6 +452,7 @@ pub fn configure_data(
.app_data(index_scheduler) .app_data(index_scheduler)
.app_data(auth) .app_data(auth)
.app_data(web::Data::from(analytics)) .app_data(web::Data::from(analytics))
.app_data(web::Data::new(logs))
.app_data( .app_data(
web::JsonConfig::default() web::JsonConfig::default()
.limit(http_payload_size_limit) .limit(http_payload_size_limit)

View File

@ -1,6 +1,7 @@
use std::env; use std::env;
use std::io::{stderr, Write}; use std::io::{stderr, Write};
use std::path::PathBuf; use std::path::PathBuf;
use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use actix_web::http::KeepAlive; use actix_web::http::KeepAlive;
@ -9,37 +10,60 @@ use actix_web::HttpServer;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use is_terminal::IsTerminal; use is_terminal::IsTerminal;
use meilisearch::analytics::Analytics; use meilisearch::analytics::Analytics;
use meilisearch::{analytics, create_app, prototype_name, setup_meilisearch, Opt}; use meilisearch::{
analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType, Opt,
};
use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE}; use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
use mimalloc::MiMalloc;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor}; use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::Layer;
#[global_allocator] #[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; static ALLOC: MiMalloc = MiMalloc;
fn default_layer() -> LogRouteType {
None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
}
/// does all the setup before meilisearch is launched /// does all the setup before meilisearch is launched
fn setup(opt: &Opt) -> anyhow::Result<()> { fn setup(opt: &Opt) -> anyhow::Result<LogRouteHandle> {
let mut log_builder = env_logger::Builder::new(); let (route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(default_layer());
let log_filters = format!( let route_layer: tracing_subscriber::reload::Layer<_, _> = route_layer;
"{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
opt.log_level let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE)
.with_filter(
tracing_subscriber::filter::LevelFilter::from_str(&opt.log_level.to_string())
.unwrap(),
),
); );
log_builder.parse_filters(&log_filters);
log_builder.init(); // set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
Ok(()) Ok(route_layer_handle)
}
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string().replace('\n', " ");
tracing::error!(%info);
} }
#[actix_web::main] #[actix_web::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?; let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
anyhow::ensure!( anyhow::ensure!(
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
); );
setup(&opt)?; let log_handle = setup(&opt)?;
match (opt.env.as_ref(), &opt.master_key) { match (opt.env.as_ref(), &opt.master_key) {
("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => { ("production", Some(master_key)) if master_key.len() < MASTER_KEY_MIN_SIZE => {
@ -77,7 +101,7 @@ async fn main() -> anyhow::Result<()> {
print_launch_resume(&opt, analytics.clone(), config_read_from); print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, analytics).await?; run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?;
Ok(()) Ok(())
} }
@ -86,6 +110,7 @@ async fn run_http(
index_scheduler: Arc<IndexScheduler>, index_scheduler: Arc<IndexScheduler>,
auth_controller: Arc<AuthController>, auth_controller: Arc<AuthController>,
opt: Opt, opt: Opt,
logs: LogRouteHandle,
analytics: Arc<dyn Analytics>, analytics: Arc<dyn Analytics>,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
let enable_dashboard = &opt.env == "development"; let enable_dashboard = &opt.env == "development";
@ -98,6 +123,7 @@ async fn run_http(
index_scheduler.clone(), index_scheduler.clone(),
auth_controller.clone(), auth_controller.clone(),
opt.clone(), opt.clone(),
logs.clone(),
analytics.clone(), analytics.clone(),
enable_dashboard, enable_dashboard,
) )

View File

@ -51,6 +51,7 @@ const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP";
const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
"MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
@ -309,6 +310,13 @@ pub struct Opt {
#[serde(default)] #[serde(default)]
pub experimental_enable_metrics: bool, pub experimental_enable_metrics: bool,
/// Experimental logs route feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/721>
///
/// Enables the log route on the `POST /logs/stream` endpoint and the `DELETE /logs/stream` to stop receiving logs.
#[clap(long, env = MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE)]
#[serde(default)]
pub experimental_enable_logs_route: bool,
/// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652> /// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
#[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
#[serde(default)] #[serde(default)]
@ -414,6 +422,7 @@ impl Opt {
#[cfg(feature = "analytics")] #[cfg(feature = "analytics")]
no_analytics, no_analytics,
experimental_enable_metrics, experimental_enable_metrics,
experimental_enable_logs_route,
experimental_reduce_indexing_memory_usage, experimental_reduce_indexing_memory_usage,
} = self; } = self;
export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_DB_PATH, db_path);
@ -470,6 +479,10 @@ impl Opt {
MEILI_EXPERIMENTAL_ENABLE_METRICS, MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(), experimental_enable_metrics.to_string(),
); );
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
experimental_enable_logs_route.to_string(),
);
export_to_env_if_not_present( export_to_env_if_not_present(
MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
experimental_reduce_indexing_memory_usage.to_string(), experimental_reduce_indexing_memory_usage.to_string(),
@ -524,7 +537,10 @@ impl Opt {
} }
pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures { pub(crate) fn to_instance_features(&self) -> InstanceTogglableFeatures {
InstanceTogglableFeatures { metrics: self.experimental_enable_metrics } InstanceTogglableFeatures {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
}
} }
} }

View File

@ -1,11 +1,11 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -32,6 +32,6 @@ pub async fn create_dump(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Create dump");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@ -3,11 +3,11 @@ use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use deserr::Deserr; use deserr::Deserr;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
@ -33,8 +33,9 @@ async fn get_features(
let features = index_scheduler.features(); let features = index_scheduler.features();
analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
debug!("returns: {:?}", features.runtime_features()); let features = features.runtime_features();
HttpResponse::Ok().json(features.runtime_features()) debug!(returns = ?features, "Get features");
HttpResponse::Ok().json(features)
} }
#[derive(Debug, Deserr)] #[derive(Debug, Deserr)]
@ -45,6 +46,8 @@ pub struct RuntimeTogglableFeatures {
#[deserr(default)] #[deserr(default)]
pub metrics: Option<bool>, pub metrics: Option<bool>,
#[deserr(default)] #[deserr(default)]
pub logs_route: Option<bool>,
#[deserr(default)]
pub export_puffin_reports: Option<bool>, pub export_puffin_reports: Option<bool>,
} }
@ -58,11 +61,13 @@ async fn patch_features(
analytics: Data<dyn Analytics>, analytics: Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let features = index_scheduler.features(); let features = index_scheduler.features();
debug!(parameters = ?new_features, "Patch features");
let old_features = features.runtime_features(); let old_features = features.runtime_features();
let new_features = meilisearch_types::features::RuntimeTogglableFeatures { let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store), vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
metrics: new_features.0.metrics.unwrap_or(old_features.metrics), metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
export_puffin_reports: new_features export_puffin_reports: new_features
.0 .0
.export_puffin_reports .export_puffin_reports
@ -75,6 +80,7 @@ async fn patch_features(
let meilisearch_types::features::RuntimeTogglableFeatures { let meilisearch_types::features::RuntimeTogglableFeatures {
vector_store, vector_store,
metrics, metrics,
logs_route,
export_puffin_reports, export_puffin_reports,
} = new_features; } = new_features;
@ -83,10 +89,12 @@ async fn patch_features(
json!({ json!({
"vector_store": vector_store, "vector_store": vector_store,
"metrics": metrics, "metrics": metrics,
"logs_route": logs_route,
"export_puffin_reports": export_puffin_reports, "export_puffin_reports": export_puffin_reports,
}), }),
Some(&req), Some(&req),
); );
index_scheduler.put_runtime_features(new_features)?; index_scheduler.put_runtime_features(new_features)?;
debug!(returns = ?new_features, "Patch features");
Ok(HttpResponse::Ok().json(new_features)) Ok(HttpResponse::Ok().json(new_features))
} }

View File

@ -8,7 +8,6 @@ use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr; use deserr::Deserr;
use futures::StreamExt; use futures::StreamExt;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@ -28,6 +27,7 @@ use serde_json::Value;
use tempfile::tempfile; use tempfile::tempfile;
use tokio::fs::File; use tokio::fs::File;
use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};
use tracing::debug;
use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@ -101,6 +101,7 @@ pub async fn get_document(
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner(); let DocumentParam { index_uid, document_id } = document_param.into_inner();
debug!(parameters = ?params, "Get document");
let index_uid = IndexUid::try_from(index_uid)?; let index_uid = IndexUid::try_from(index_uid)?;
analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
@ -110,7 +111,7 @@ pub async fn get_document(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?; let document = retrieve_document(&index, &document_id, attributes_to_retrieve)?;
debug!("returns: {:?}", document); debug!(returns = ?document, "Get document");
Ok(HttpResponse::Ok().json(document)) Ok(HttpResponse::Ok().json(document))
} }
@ -131,7 +132,7 @@ pub async fn delete_document(
}; };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Delete document");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -168,9 +169,8 @@ pub async fn documents_by_query_post(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with body: {:?}", body);
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Get documents POST");
analytics.post_fetch_documents( analytics.post_fetch_documents(
&DocumentFetchKind::Normal { &DocumentFetchKind::Normal {
@ -191,7 +191,7 @@ pub async fn get_documents(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params); debug!(parameters = ?params, "Get documents GET");
let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner(); let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
@ -235,7 +235,7 @@ fn documents_by_query(
let ret = PaginationView::new(offset, limit, total as usize, documents); let ret = PaginationView::new(offset, limit, total as usize, documents);
debug!("returns: {:?}", ret); debug!(returns = ?ret, "Get documents");
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))
} }
@ -271,7 +271,7 @@ pub async fn replace_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params); debug!(parameters = ?params, "Replace documents");
let params = params.into_inner(); let params = params.into_inner();
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req); analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@ -288,6 +288,7 @@ pub async fn replace_documents(
allow_index_creation, allow_index_creation,
) )
.await?; .await?;
debug!(returns = ?task, "Replace documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -302,8 +303,8 @@ pub async fn update_documents(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner(); let params = params.into_inner();
debug!(parameters = ?params, "Update documents");
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req); analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@ -319,6 +320,7 @@ pub async fn update_documents(
allow_index_creation, allow_index_creation,
) )
.await?; .await?;
debug!(returns = ?task, "Update documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -427,7 +429,10 @@ async fn document_addition(
Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e)))
if e.kind() == ErrorKind::NotFound => {} if e.kind() == ErrorKind::NotFound => {}
Err(e) => { Err(e) => {
log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); tracing::warn!(
index_uuid = %uuid,
"Unknown error happened while deleting a malformed update file: {e}"
);
} }
} }
// We still want to return the original error to the end user. // We still want to return the original error to the end user.
@ -453,7 +458,6 @@ async fn document_addition(
} }
}; };
debug!("returns: {:?}", task);
Ok(task.into()) Ok(task.into())
} }
@ -464,7 +468,7 @@ pub async fn delete_documents_batch(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body); debug!(parameters = ?body, "Delete documents by batch");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); analytics.delete_documents(DocumentDeletionKind::PerBatch, &req);
@ -479,7 +483,7 @@ pub async fn delete_documents_batch(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Delete documents by batch");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -497,7 +501,7 @@ pub async fn delete_documents_by_filter(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body); debug!(parameters = ?body, "Delete documents by filter");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let index_uid = index_uid.into_inner(); let index_uid = index_uid.into_inner();
let filter = body.into_inner().filter; let filter = body.into_inner().filter;
@ -515,7 +519,7 @@ pub async fn delete_documents_by_filter(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Delete documents by filter");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -532,7 +536,7 @@ pub async fn clear_all_documents(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Delete all documents");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@ -2,12 +2,12 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use serde_json::Value; use serde_json::Value;
use tracing::debug;
use crate::analytics::{Analytics, FacetSearchAggregator}; use crate::analytics::{Analytics, FacetSearchAggregator};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -56,7 +56,7 @@ pub async fn search(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.into_inner(); let query = params.into_inner();
debug!("facet search called with params: {:?}", query); debug!(parameters = ?query, "Facet search");
let mut aggregate = FacetSearchAggregator::from_query(&query, &req); let mut aggregate = FacetSearchAggregator::from_query(&query, &req);
@ -83,7 +83,7 @@ pub async fn search(
let search_result = search_result?; let search_result = search_result?;
debug!("returns: {:?}", search_result); debug!(returns = ?search_result, "Facet search");
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }

View File

@ -5,7 +5,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
@ -16,6 +15,7 @@ use meilisearch_types::tasks::KindWithContent;
use serde::Serialize; use serde::Serialize;
use serde_json::json; use serde_json::json;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics; use crate::analytics::Analytics;
@ -93,6 +93,7 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>, index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>, paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters(); let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> = let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> { index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
@ -105,7 +106,7 @@ pub async fn list_indexes(
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect(); let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!("returns: {:?}", ret); debug!(returns = ?ret, "List indexes");
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))
} }
@ -124,6 +125,7 @@ pub async fn create_index(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?body, "Create index");
let IndexCreateRequest { primary_key, uid } = body.into_inner(); let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid);
@ -137,6 +139,7 @@ pub async fn create_index(
let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Create index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} else { } else {
@ -177,7 +180,7 @@ pub async fn get_index(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let index_view = IndexView::new(index_uid.into_inner(), &index)?; let index_view = IndexView::new(index_uid.into_inner(), &index)?;
debug!("returns: {:?}", index_view); debug!(returns = ?index_view, "Get index");
Ok(HttpResponse::Ok().json(index_view)) Ok(HttpResponse::Ok().json(index_view))
} }
@ -189,7 +192,7 @@ pub async fn update_index(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", body); debug!(parameters = ?body, "Update index");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
analytics.publish( analytics.publish(
@ -206,7 +209,7 @@ pub async fn update_index(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Update index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -218,6 +221,7 @@ pub async fn delete_index(
let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!(returns = ?task, "Delete index");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -255,6 +259,6 @@ pub async fn get_index_stats(
let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?); let stats = IndexStats::from(index_scheduler.index_stats(&index_uid)?);
debug!("returns: {:?}", stats); debug!(returns = ?stats, "Get index stats");
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }

View File

@ -2,7 +2,6 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::{debug, warn};
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
@ -12,6 +11,7 @@ use meilisearch_types::milli;
use meilisearch_types::milli::vector::DistributionShift; use meilisearch_types::milli::vector::DistributionShift;
use meilisearch_types::serde_cs::vec::CS; use meilisearch_types::serde_cs::vec::CS;
use serde_json::Value; use serde_json::Value;
use tracing::{debug, warn};
use crate::analytics::{Analytics, SearchAggregator}; use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -186,7 +186,7 @@ pub async fn search_with_url_query(
req: HttpRequest, req: HttpRequest,
analytics: web::Data<dyn Analytics>, analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!("called with params: {:?}", params); debug!(parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into(); let mut query: SearchQuery = params.into_inner().into();
@ -213,7 +213,7 @@ pub async fn search_with_url_query(
let search_result = search_result?; let search_result = search_result?;
debug!("returns: {:?}", search_result); debug!(returns = ?search_result, "Search get");
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }
@ -227,7 +227,7 @@ pub async fn search_with_post(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query = params.into_inner(); let mut query = params.into_inner();
debug!("search called with params: {:?}", query); debug!(parameters = ?query, "Search post");
// Tenant token search_rules. // Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@ -252,7 +252,7 @@ pub async fn search_with_post(
let search_result = search_result?; let search_result = search_result?;
debug!("returns: {:?}", search_result); debug!(returns = ?search_result, "Search post");
Ok(HttpResponse::Ok().json(search_result)) Ok(HttpResponse::Ok().json(search_result))
} }

View File

@ -2,7 +2,6 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::facet_values_sort::FacetValuesSort;
@ -11,6 +10,7 @@ use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked}; use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -24,12 +24,12 @@ macro_rules! make_setting_route {
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse, Resource}; use actix_web::{web, HttpRequest, HttpResponse, Resource};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid; use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::Setting; use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::{settings, Settings}; use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use tracing::debug;
use $crate::analytics::Analytics; use $crate::analytics::Analytics;
use $crate::extractors::authentication::policies::*; use $crate::extractors::authentication::policies::*;
use $crate::extractors::authentication::GuardedData; use $crate::extractors::authentication::GuardedData;
@ -61,7 +61,7 @@ macro_rules! make_setting_route {
.await?? .await??
.into(); .into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Delete settings");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -78,6 +78,7 @@ macro_rules! make_setting_route {
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let body = body.into_inner(); let body = body.into_inner();
debug!(parameters = ?body, "Update settings");
#[allow(clippy::redundant_closure_call)] #[allow(clippy::redundant_closure_call)]
$analytics(&body, &req); $analytics(&body, &req);
@ -109,7 +110,7 @@ macro_rules! make_setting_route {
.await?? .await??
.into(); .into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Update settings");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -126,7 +127,7 @@ macro_rules! make_setting_route {
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let settings = settings(&index, &rtxn)?; let settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", settings); debug!(returns = ?settings, "Update settings");
let mut json = serde_json::json!(&settings); let mut json = serde_json::json!(&settings);
let val = json[$camelcase_attr].take(); let val = json[$camelcase_attr].take();
@ -656,6 +657,7 @@ pub async fn update_all(
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let new_settings = body.into_inner(); let new_settings = body.into_inner();
debug!(parameters = ?new_settings, "Update all settings");
let new_settings = validate_settings(new_settings, &index_scheduler)?; let new_settings = validate_settings(new_settings, &index_scheduler)?;
analytics.publish( analytics.publish(
@ -768,7 +770,7 @@ pub async fn update_all(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Update all settings");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }
@ -781,7 +783,7 @@ pub async fn get_all(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let new_settings = settings(&index, &rtxn)?; let new_settings = settings(&index, &rtxn)?;
debug!("returns: {:?}", new_settings); debug!(returns = ?new_settings, "Get all settings");
Ok(HttpResponse::Ok().json(new_settings)) Ok(HttpResponse::Ok().json(new_settings))
} }
@ -804,7 +806,7 @@ pub async fn delete_all(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Delete all settings");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@ -0,0 +1,281 @@
use std::convert::Infallible;
use std::io::Write;
use std::ops::ControlFlow;
use std::pin::Pin;
use std::str::FromStr;
use std::sync::Arc;
use actix_web::web::{Bytes, Data};
use actix_web::{web, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use futures_util::Stream;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use tokio::sync::mpsc;
use tracing_subscriber::filter::Targets;
use tracing_subscriber::Layer;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::LogRouteHandle;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("stream")
.route(web::post().to(SeqHandler(get_logs)))
.route(web::delete().to(SeqHandler(cancel_logs))),
);
}
#[derive(Debug, Default, Clone, Copy, Deserr, PartialEq, Eq)]
#[deserr(rename_all = camelCase)]
pub enum LogMode {
#[default]
Human,
Profile,
}
/// Simple wrapper around the `Targets` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Clone, Debug)]
struct MyTargets(Targets);
/// Simple wrapper around the `ParseError` from `tracing_subscriber` to implement `MergeWithError` on it.
#[derive(Debug, thiserror::Error)]
enum MyParseError {
#[error(transparent)]
ParseError(#[from] tracing_subscriber::filter::ParseError),
#[error(
"Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`"
)]
Example,
}
impl FromStr for MyTargets {
type Err = MyParseError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.is_empty() {
Err(MyParseError::Example)
} else {
Ok(MyTargets(Targets::from_str(s).map_err(MyParseError::ParseError)?))
}
}
}
impl MergeWithError<MyParseError> for DeserrJsonError<BadRequest> {
fn merge(
_self_: Option<Self>,
other: MyParseError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields, validate = validate_get_logs -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct GetLogs {
#[deserr(default = "info".parse().unwrap(), try_from(&String) = MyTargets::from_str -> DeserrJsonError<BadRequest>)]
target: MyTargets,
#[deserr(default, error = DeserrJsonError<BadRequest>)]
mode: LogMode,
#[deserr(default = false, error = DeserrJsonError<BadRequest>)]
profile_memory: bool,
}
fn validate_get_logs<E: DeserializeError>(
logs: GetLogs,
location: ValuePointerRef,
) -> Result<GetLogs, E> {
if logs.profile_memory && logs.mode != LogMode::Profile {
Err(deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected {
msg: format!("`profile_memory` can only be used while profiling code and is not compatible with the {:?} mode.", logs.mode),
},
location,
)))
} else {
Ok(logs)
}
}
struct LogWriter {
sender: mpsc::UnboundedSender<Vec<u8>>,
}
impl Write for LogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.sender.send(buf.to_vec()).map_err(std::io::Error::other)?;
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
struct HandleGuard {
/// We need to keep an handle on the logs to make it available again when the streamer is dropped
logs: Arc<LogRouteHandle>,
}
impl Drop for HandleGuard {
fn drop(&mut self) {
if let Err(e) = self.logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
}
}
fn byte_stream(
receiver: mpsc::UnboundedReceiver<Vec<u8>>,
guard: HandleGuard,
) -> impl futures_util::Stream<Item = Result<Bytes, ResponseError>> {
futures_util::stream::unfold((receiver, guard), move |(mut receiver, guard)| async move {
let vec = receiver.recv().await;
vec.map(From::from).map(Ok).map(|a| (a, (receiver, guard)))
})
}
type PinnedByteStream = Pin<Box<dyn Stream<Item = Result<Bytes, ResponseError>>>>;
fn make_layer<
S: tracing::Subscriber + for<'span> tracing_subscriber::registry::LookupSpan<'span>,
>(
opt: &GetLogs,
logs: Data<LogRouteHandle>,
) -> (Box<dyn Layer<S> + Send + Sync>, PinnedByteStream) {
let guard = HandleGuard { logs: logs.into_inner() };
match opt.mode {
LogMode::Human => {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let fmt_layer = tracing_subscriber::fmt::layer()
.with_writer(move || LogWriter { sender: sender.clone() })
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE);
let stream = byte_stream(receiver, guard);
(Box::new(fmt_layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
LogMode::Profile => {
let (trace, layer) = tracing_trace::Trace::new(opt.profile_memory);
let stream = entry_stream(trace, guard);
(Box::new(layer) as Box<dyn Layer<S> + Send + Sync>, Box::pin(stream))
}
}
}
fn entry_stream(
trace: tracing_trace::Trace,
guard: HandleGuard,
) -> impl Stream<Item = Result<Bytes, ResponseError>> {
let receiver = trace.into_receiver();
let entry_buf = Vec::new();
futures_util::stream::unfold(
(receiver, entry_buf, guard),
move |(mut receiver, mut entry_buf, guard)| async move {
let mut bytes = Vec::new();
while bytes.len() < 8192 {
entry_buf.clear();
let Ok(count) = tokio::time::timeout(
std::time::Duration::from_secs(1),
receiver.recv_many(&mut entry_buf, 100),
)
.await
else {
break;
};
if count == 0 {
if !bytes.is_empty() {
break;
}
// channel closed, exit
return None;
}
for entry in &entry_buf {
if let Err(error) = serde_json::to_writer(&mut bytes, entry) {
tracing::error!(
error = &error as &dyn std::error::Error,
"deserializing entry"
);
return Some((
Err(ResponseError::from_msg(
format!("error deserializing entry: {error}"),
Code::Internal,
)),
(receiver, entry_buf, guard),
));
}
}
}
Some((Ok(bytes.into()), (receiver, entry_buf, guard)))
},
)
}
pub async fn get_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
body: AwebJson<GetLogs, DeserrJsonError>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
let opt = body.into_inner();
let mut stream = None;
logs.modify(|layer| match layer.inner_mut() {
None => {
// there is no one getting logs
*layer.filter_mut() = opt.target.0.clone();
let (new_layer, new_stream) = make_layer(&opt, logs.clone());
*layer.inner_mut() = Some(new_layer);
stream = Some(new_stream);
}
Some(_) => {
// there is already someone getting logs
}
})
.unwrap();
if let Some(stream) = stream {
Ok(HttpResponse::Ok().streaming(stream))
} else {
Err(MeilisearchHttpError::AlreadyUsedLogRoute.into())
}
}
pub async fn cancel_logs(
index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
logs: Data<LogRouteHandle>,
) -> Result<HttpResponse, ResponseError> {
index_scheduler.features().check_logs_route()?;
if let Err(e) = logs.modify(|layer| *layer.inner_mut() = None) {
tracing::error!("Could not free the logs route: {e}");
}
Ok(HttpResponse::NoContent().finish())
}

View File

@ -3,7 +3,6 @@ use std::collections::BTreeMap;
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::settings::{Settings, Unchecked};
@ -11,6 +10,7 @@ use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::json; use serde_json::json;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -22,6 +22,7 @@ mod api_key;
mod dump; mod dump;
pub mod features; pub mod features;
pub mod indexes; pub mod indexes;
mod logs;
mod metrics; mod metrics;
mod multi_search; mod multi_search;
mod snapshot; mod snapshot;
@ -31,6 +32,7 @@ pub mod tasks;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::scope("/tasks").configure(tasks::configure)) cfg.service(web::scope("/tasks").configure(tasks::configure))
.service(web::resource("/health").route(web::get().to(get_health))) .service(web::resource("/health").route(web::get().to(get_health)))
.service(web::scope("/logs").configure(logs::configure))
.service(web::scope("/keys").configure(api_key::configure)) .service(web::scope("/keys").configure(api_key::configure))
.service(web::scope("/dumps").configure(dump::configure)) .service(web::scope("/dumps").configure(dump::configure))
.service(web::scope("/snapshots").configure(snapshot::configure)) .service(web::scope("/snapshots").configure(snapshot::configure))
@ -250,7 +252,7 @@ async fn get_stats(
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?; let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!("returns: {:?}", stats); debug!(returns = ?stats, "Get stats");
Ok(HttpResponse::Ok().json(stats)) Ok(HttpResponse::Ok().json(stats))
} }

View File

@ -3,11 +3,11 @@ use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse}; use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson; use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions; use meilisearch_types::keys::actions;
use serde::Serialize; use serde::Serialize;
use tracing::debug;
use crate::analytics::{Analytics, MultiSearchAggregator}; use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::policies::ActionPolicy;
@ -52,7 +52,7 @@ pub async fn multi_search_with_post(
for (query_index, (index_uid, mut query)) in for (query_index, (index_uid, mut query)) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
{ {
debug!("multi-search #{query_index}: called with params: {:?}", query); debug!(on_index = query_index, parameters = ?query, "Multi-search");
// Check index from API key // Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) { if !index_scheduler.filters().is_index_authorized(&index_uid) {
@ -107,7 +107,7 @@ pub async fn multi_search_with_post(
err err
})?; })?;
debug!("returns: {:?}", search_results); debug!(returns = ?search_results, "Multi-search");
Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
} }

View File

@ -1,10 +1,10 @@
use actix_web::web::Data; use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent; use meilisearch_types::tasks::KindWithContent;
use serde_json::json; use serde_json::json;
use tracing::debug;
use crate::analytics::Analytics; use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*; use crate::extractors::authentication::policies::*;
@ -27,6 +27,6 @@ pub async fn create_snapshot(
let task: SummarizedTaskView = let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
debug!("returns: {:?}", task); debug!(returns = ?task, "Create snapshot");
Ok(HttpResponse::Accepted().json(task)) Ok(HttpResponse::Accepted().json(task))
} }

View File

@ -59,6 +59,8 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"}, ("POST", "/snapshots") => hashset!{"snapshots.create", "snapshots.*", "*"},
("GET", "/version") => hashset!{"version", "*"}, ("GET", "/version") => hashset!{"version", "*"},
("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"}, ("GET", "/metrics") => hashset!{"metrics.get", "metrics.*", "*"},
("POST", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("DELETE", "/logs/stream") => hashset!{"metrics.get", "metrics.*", "*"},
("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"}, ("PATCH", "/keys/mykey/") => hashset!{"keys.update", "*"},
("GET", "/keys/mykey/") => hashset!{"keys.get", "*"}, ("GET", "/keys/mykey/") => hashset!{"keys.get", "*"},
("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"}, ("DELETE", "/keys/mykey/") => hashset!{"keys.delete", "*"},

View File

@ -13,6 +13,8 @@ use meilisearch::{analytics, create_app, setup_meilisearch};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use tempfile::TempDir; use tempfile::TempDir;
use tokio::time::sleep; use tokio::time::sleep;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use super::index::Index; use super::index::Index;
use super::service::Service; use super::service::Service;
@ -81,10 +83,16 @@ impl Server {
Response = ServiceResponse<impl MessageBody>, Response = ServiceResponse<impl MessageBody>,
Error = actix_web::Error, Error = actix_web::Error,
> { > {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
actix_web::test::init_service(create_app( actix_web::test::init_service(create_app(
self.service.index_scheduler.clone().into(), self.service.index_scheduler.clone().into(),
self.service.auth.clone().into(), self.service.auth.clone().into(),
self.service.options.clone(), self.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.service.options), analytics::MockAnalytics::new(&self.service.options),
true, true,
)) ))

View File

@ -7,6 +7,8 @@ use actix_web::test::TestRequest;
use index_scheduler::IndexScheduler; use index_scheduler::IndexScheduler;
use meilisearch::{analytics, create_app, Opt}; use meilisearch::{analytics, create_app, Opt};
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use tracing::level_filters::LevelFilter;
use tracing_subscriber::Layer;
use crate::common::encoder::Encoder; use crate::common::encoder::Encoder;
use crate::common::Value; use crate::common::Value;
@ -105,10 +107,16 @@ impl Service {
} }
pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) {
let (_route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let app = test::init_service(create_app( let app = test::init_service(create_app(
self.index_scheduler.clone().into(), self.index_scheduler.clone().into(),
self.auth.clone().into(), self.auth.clone().into(),
self.options.clone(), self.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&self.options), analytics::MockAnalytics::new(&self.options),
true, true,
)) ))

View File

@ -1847,6 +1847,7 @@ async fn import_dump_v6_containing_experimental_features() {
{ {
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);

View File

@ -20,6 +20,7 @@ async fn experimental_features() {
{ {
"vectorStore": false, "vectorStore": false,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@ -31,6 +32,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@ -42,6 +44,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@ -54,6 +57,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@ -66,6 +70,7 @@ async fn experimental_features() {
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@ -85,6 +90,7 @@ async fn experimental_feature_metrics() {
{ {
"vectorStore": false, "vectorStore": false,
"metrics": true, "metrics": true,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);
@ -140,7 +146,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `exportPuffinReports`", "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
"code": "bad_request", "code": "bad_request",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request" "link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -5,6 +5,7 @@ mod documents;
mod dumps; mod dumps;
mod features; mod features;
mod index; mod index;
mod logs;
mod search; mod search;
mod settings; mod settings;
mod snapshot; mod snapshot;

View File

@ -0,0 +1,182 @@
use meili_snap::*;
use crate::common::Server;
use crate::json;
#[actix_rt::test]
async fn logs_stream_bad_target() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "target": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.target`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Our help message
let (response, code) = server.service.post("/logs/stream", json!({ "target": "" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: Empty string is not a valid target. If you want to get no logs use `OFF`. Usage: `info`, `meilisearch=info`, or you can write multiple filters in one target: `index_scheduler=info,milli=trace`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// An error from the target parser
let (response, code) = server.service.post("/logs/stream", json!({ "target": "==" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value at `.target`: invalid filter directive: too many '=' in filter directive, expected 0 or 1",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_mode() {
let server = Server::new().await;
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found a boolean: `true`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) = server.service.post("/logs/stream", json!({ "mode": [] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.mode`: expected a string, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong value
let (response, code) = server.service.post("/logs/stream", json!({ "mode": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `tamo` at `.mode`: expected one of `human`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_bad_profile_memory() {
let server = Server::new().await;
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": "tamo" })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found a string: `\"tamo\"`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Wrong type
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": ["hello", "kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value type at `.profileMemory`: expected a boolean, but found an array: `[\"hello\",\"kefir\"]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
// Used with default parameters
let (response, code) =
server.service.post("/logs/stream", json!({ "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value: `profile_memory` can only be used while profiling code and is not compatible with the Human mode.",
"code": "invalid_settings_typo_tolerance",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_typo_tolerance"
}
"###);
// Used with an unsupported mode
let (response, code) =
server.service.post("/logs/stream", json!({ "mode": "fmt", "profileMemory": true })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown value `fmt` at `.mode`: expected one of `human`, `profile`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn logs_stream_without_enabling_the_route() {
let server = Server::new().await;
let (response, code) = server.service.post("/logs/stream", json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) = server.service.delete("/logs/stream").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "getting logs through the `/logs/stream` route requires enabling the `logs route` experimental feature. See https://github.com/orgs/meilisearch/discussions/721",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@ -0,0 +1,92 @@
mod error;
use std::rc::Rc;
use std::str::FromStr;
use actix_web::http::header::ContentType;
use meili_snap::snapshot;
use meilisearch::{analytics, create_app, Opt};
use tracing::level_filters::LevelFilter;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::Layer;
use crate::common::{default_settings, Server};
use crate::json;
#[actix_web::test]
async fn basic_test_log_stream_route() {
let db_path = tempfile::tempdir().unwrap();
let server = Server::new_with_options(Opt {
experimental_enable_logs_route: true,
..default_settings(db_path.path())
})
.await
.unwrap();
let (route_layer, route_layer_handle) =
tracing_subscriber::reload::Layer::new(None.with_filter(
tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF),
));
let subscriber = tracing_subscriber::registry().with(route_layer).with(
tracing_subscriber::fmt::layer()
.with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE)
.with_filter(tracing_subscriber::filter::LevelFilter::from_str("INFO").unwrap()),
);
let app = actix_web::test::init_service(create_app(
server.service.index_scheduler.clone().into(),
server.service.auth.clone().into(),
server.service.options.clone(),
route_layer_handle,
analytics::MockAnalytics::new(&server.service.options),
true,
))
.await;
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
let app = Rc::new(app);
// First, we start listening on the `/logs/stream` route
let handle_app = app.clone();
let handle = tokio::task::spawn_local(async move {
let req = actix_web::test::TestRequest::post()
.uri("/logs/stream")
.insert_header(ContentType::json())
.set_payload(
serde_json::to_vec(&json!({
"mode": "human",
"target": "info",
}))
.unwrap(),
);
let req = req.to_request();
let ret = actix_web::test::call_service(&*handle_app, req).await;
actix_web::test::read_body(ret).await
});
// We're going to create an index to get at least one info log saying we processed a batch of task
let (ret, _code) = server.create_index(json!({ "uid": "tamo" })).await;
snapshot!(ret, @r###"
{
"taskUid": 0,
"indexUid": "tamo",
"status": "enqueued",
"type": "indexCreation",
"enqueuedAt": "[date]"
}
"###);
server.wait_task(ret.uid()).await;
let req = actix_web::test::TestRequest::delete().uri("/logs/stream");
let req = req.to_request();
let ret = actix_web::test::call_service(&*app, req).await;
let code = ret.status();
snapshot!(code, @"204 No Content");
let logs = handle.await.unwrap();
let logs = String::from_utf8(logs.to_vec()).unwrap();
assert!(logs.contains("INFO"), "{logs}");
}

View File

@ -15,6 +15,7 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
{ {
"vectorStore": true, "vectorStore": true,
"metrics": false, "metrics": false,
"logsRoute": false,
"exportPuffinReports": false "exportPuffinReports": false
} }
"###); "###);

View File

@ -44,10 +44,6 @@ struct WebhookHandle {
} }
async fn create_webhook_server() -> WebhookHandle { async fn create_webhook_server() -> WebhookHandle {
let mut log_builder = env_logger::Builder::new();
log_builder.parse_filters("info");
log_builder.init();
let (sender, receiver) = mpsc::unbounded_channel(); let (sender, receiver) = mpsc::unbounded_channel();
let sender = Arc::new(sender); let sender = Arc::new(sender);

View File

@ -71,7 +71,6 @@ itertools = "0.11.0"
puffin = "0.16.0" puffin = "0.16.0"
# logging # logging
log = "0.4.20"
logging_timer = "1.1.0" logging_timer = "1.1.0"
csv = "1.3.0" csv = "1.3.0"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
@ -91,6 +90,7 @@ tiktoken-rs = "0.5.8"
liquid = "0.26.4" liquid = "0.26.4"
arroy = "0.2.0" arroy = "0.2.0"
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40"
[dev-dependencies] [dev-dependencies]
mimalloc = { version = "0.1.39", default-features = false } mimalloc = { version = "0.1.39", default-features = false }
@ -102,15 +102,7 @@ meili-snap = { path = "../meili-snap" }
rand = { version = "0.8.5", features = ["small_rng"] } rand = { version = "0.8.5", features = ["small_rng"] }
[features] [features]
all-tokenizations = [ all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]
"charabia/chinese",
"charabia/hebrew",
"charabia/japanese",
"charabia/thai",
"charabia/korean",
"charabia/greek",
"charabia/khmer",
]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml

View File

@ -25,6 +25,7 @@ impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
/// ///
/// It first retrieves the index, then moves to the first document. Use the `into_cursor` /// It first retrieves the index, then moves to the first document. Use the `into_cursor`
/// method to iterator over the documents, from the first to the last. /// method to iterator over the documents, from the first to the last.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn from_reader(reader: R) -> Result<Self, Error> { pub fn from_reader(reader: R) -> Result<Self, Error> {
let reader = grenad::Reader::new(reader)?; let reader = grenad::Reader::new(reader)?;
let mut cursor = reader.into_cursor()?; let mut cursor = reader.into_cursor()?;

View File

@ -6,9 +6,9 @@ use charabia::Normalize;
use fst::automaton::{Automaton, Str}; use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};

View File

@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue; continue;
} }
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx, ctx,
logger, logger,
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!(); back!();
continue; continue;
}; };
drop(entered);
ranking_rule_scores.push(next_bucket.score); ranking_rule_scores.push(next_bucket.score);

View File

@ -14,6 +14,12 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
ClearDocuments { wtxn, index } ClearDocuments { wtxn, index }
} }
#[tracing::instrument(
level = "trace",
skip(self),
target = "indexing::documents",
name = "clear_documents"
)]
pub fn execute(self) -> Result<u64> { pub fn execute(self) -> Result<u64> {
puffin::profile_function!(); puffin::profile_function!();

View File

@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm}; use grenad::{CompressionType, SortAlgorithm};
use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode; use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental; use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk; use super::FacetsUpdateBulk;

View File

@ -22,6 +22,7 @@ use crate::{FieldId, Index, Result};
/// # Panics /// # Panics
/// ///
/// - if reader.is_empty(), this function may panic in some cases /// - if reader.is_empty(), this function may panic in some cases
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn enrich_documents_batch<R: Read + Seek>( pub fn enrich_documents_batch<R: Read + Seek>(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
@ -77,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
}, },
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
[(field_id, name)] => { [(field_id, name)] => {
log::info!("Primary key was not specified in index. Inferred to '{name}'"); tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
PrimaryKey::Flat { name, field_id: *field_id } PrimaryKey::Flat { name, field_id: *field_id }
} }
multiple => { multiple => {
@ -143,6 +144,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
/// Retrieve the document id after validating it, returning a `UserError` /// Retrieve the document id after validating it, returning a `UserError`
/// if the id is invalid or can't be guessed. /// if the id is invalid or can't be guessed.
#[tracing::instrument(level = "trace", skip(uuid_buffer, documents_batch_index, document)
target = "indexing::documents")]
fn fetch_or_generate_document_id( fn fetch_or_generate_document_id(
document: &obkv::KvReader<FieldId>, document: &obkv::KvReader<FieldId>,
documents_batch_index: &DocumentsBatchIndex, documents_batch_index: &DocumentsBatchIndex,

View File

@ -21,7 +21,7 @@ pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, R
/// ///
/// Returns the generated internal documents ids and a grenad reader /// Returns the generated internal documents ids and a grenad reader
/// with the list of extracted words from the given chunk of documents. /// with the list of extracted words from the given chunk of documents.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_docid_word_positions<R: io::Read + io::Seek>( pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,

View File

@ -16,7 +16,7 @@ use crate::Result;
/// ///
/// Returns a grenad reader with the list of extracted facet numbers and /// Returns a grenad reader with the list of extracted facet numbers and
/// documents ids from the given chunk of docid facet number positions. /// documents ids from the given chunk of docid facet number positions.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_facet_number_docids<R: io::Read + io::Seek>( pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
fid_docid_facet_number: grenad::Reader<R>, fid_docid_facet_number: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,

View File

@ -15,7 +15,7 @@ use crate::{FieldId, Result};
/// ///
/// Returns a grenad reader with the list of extracted facet strings and /// Returns a grenad reader with the list of extracted facet strings and
/// documents ids from the given chunk of docid facet string positions. /// documents ids from the given chunk of docid facet string positions.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_facet_string_docids<R: io::Read + io::Seek>( pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>, docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,

View File

@ -39,7 +39,7 @@ pub struct ExtractedFacetValues {
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
/// and the normalized value as value extracted from the given chunk of documents. /// and the normalized value as value extracted from the given chunk of documents.
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
if let Ok(float) = original.parse() { if let Ok(float) = original.parse() {
output_numbers.push(float); output_numbers.push(float);
} else { } else {
log::warn!( tracing::warn!(
"Internal error, could not parse a geofield that has been validated. Please open an issue." "Internal error, could not parse a geofield that has been validated. Please open an issue."
) )
} }

View File

@ -19,7 +19,7 @@ const MAX_COUNTED_WORDS: usize = 30;
/// ///
/// Returns a grenad reader with the list of extracted field id word counts /// Returns a grenad reader with the list of extracted field id word counts
/// and documents ids from the given chunk of docid word positions. /// and documents ids from the given chunk of docid word positions.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>( pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>, docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,

View File

@ -13,7 +13,7 @@ use crate::{FieldId, InternalError, Result};
/// Extracts the geographical coordinates contained in each document under the `_geo` field. /// Extracts the geographical coordinates contained in each document under the `_geo` field.
/// ///
/// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude) /// Returns the generated grenad reader containing the docid as key associated to the (latitude, longitude)
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_geo_points<R: io::Read + io::Seek>( pub fn extract_geo_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,

View File

@ -67,7 +67,7 @@ impl VectorStateDelta {
/// Extracts the embedding vector contained in each document under the `_vectors` field. /// Extracts the embedding vector contained in each document under the `_vectors` field.
/// ///
/// Returns the generated grenad reader containing the docid as key associated to the Vec<f32> /// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_vector_points<R: io::Read + io::Seek>( pub fn extract_vector_points<R: io::Read + io::Seek>(
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
if old_prompt != new_prompt { if old_prompt != new_prompt {
log::trace!( tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
); );
VectorStateDelta::NowGenerated(new_prompt) VectorStateDelta::NowGenerated(new_prompt)
} else { } else {
log::trace!("⏭️ Prompt unmodified, skipping"); tracing::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange VectorStateDelta::NoChange
} }
} else { } else {

View File

@ -23,7 +23,7 @@ use crate::{DocumentId, FieldId, Result};
/// ///
/// The first returned reader is the one for normal word_docids, and the second one is for /// The first returned reader is the one for normal word_docids, and the second one is for
/// exact_word_docids /// exact_word_docids
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_docids<R: io::Read + io::Seek>( pub fn extract_word_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>, docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
@ -135,6 +135,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
)) ))
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_into_sorter( fn words_into_sorter(
document_id: DocumentId, document_id: DocumentId,
fid: FieldId, fid: FieldId,

View File

@ -19,7 +19,7 @@ use crate::{DocumentId, Result};
/// ///
/// Returns a grenad reader with the list of extracted word pairs proximities and /// Returns a grenad reader with the list of extracted word pairs proximities and
/// documents ids from the given chunk of docid word positions. /// documents ids from the given chunk of docid word positions.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>( pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>, docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
@ -59,6 +59,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if current_document_id.map_or(false, |id| id != document_id) { if current_document_id.map_or(false, |id| id != document_id) {
puffin::profile_scope!("Document into sorter"); puffin::profile_scope!("Document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter( document_word_positions_into_sorter(
current_document_id.unwrap(), current_document_id.unwrap(),
&del_word_pair_proximity, &del_word_pair_proximity,
@ -138,6 +142,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
if let Some(document_id) = current_document_id { if let Some(document_id) = current_document_id {
puffin::profile_scope!("Final document into sorter"); puffin::profile_scope!("Final document into sorter");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
let _entered = span.enter();
document_word_positions_into_sorter( document_word_positions_into_sorter(
document_id, document_id,
&del_word_pair_proximity, &del_word_pair_proximity,
@ -147,6 +155,10 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
} }
{ {
puffin::profile_scope!("sorter_into_reader"); puffin::profile_scope!("sorter_into_reader");
// FIXME: span inside of a hot loop might degrade performance and create big reports
let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
let _entered = span.enter();
let mut writer = create_writer( let mut writer = create_writer(
indexer.chunk_compression_type, indexer.chunk_compression_type,
indexer.chunk_compression_level, indexer.chunk_compression_level,

View File

@ -18,7 +18,7 @@ use crate::{bucketed_position, DocumentId, Result};
/// ///
/// Returns a grenad reader with the list of extracted words at positions and /// Returns a grenad reader with the list of extracted words at positions and
/// documents ids from the given chunk of docid word positions. /// documents ids from the given chunk of docid word positions.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub fn extract_word_position_docids<R: io::Read + io::Seek>( pub fn extract_word_position_docids<R: io::Read + io::Seek>(
docid_word_positions: grenad::Reader<R>, docid_word_positions: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
@ -94,6 +94,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
Ok(word_position_docids_reader) Ok(word_position_docids_reader)
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
fn words_position_into_sorter( fn words_position_into_sorter(
document_id: DocumentId, document_id: DocumentId,
key_buffer: &mut Vec<u8>, key_buffer: &mut Vec<u8>,

View File

@ -14,8 +14,8 @@ use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use crossbeam_channel::Sender; use crossbeam_channel::Sender;
use log::debug;
use rayon::prelude::*; use rayon::prelude::*;
use tracing::debug;
use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_docid_word_positions::extract_docid_word_positions;
use self::extract_facet_number_docids::extract_facet_number_docids; use self::extract_facet_number_docids::extract_facet_number_docids;
@ -41,6 +41,7 @@ use crate::{FieldId, FieldsIdsMap, Result};
/// Extract data for each databases from obkv documents in parallel. /// Extract data for each databases from obkv documents in parallel.
/// Send data in grenad file over provided Sender. /// Send data in grenad file over provided Sender.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
pub(crate) fn data_from_obkv_documents( pub(crate) fn data_from_obkv_documents(
original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send, original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send, flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
@ -113,7 +114,7 @@ pub(crate) fn data_from_obkv_documents(
{ {
let lmdb_writer_sx = lmdb_writer_sx.clone(); let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || { rayon::spawn(move || {
debug!("merge {} database", "facet-id-exists-docids"); debug!(database = "facet-id-exists-docids", "merge");
match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => { Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
@ -129,7 +130,7 @@ pub(crate) fn data_from_obkv_documents(
{ {
let lmdb_writer_sx = lmdb_writer_sx.clone(); let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || { rayon::spawn(move || {
debug!("merge {} database", "facet-id-is-null-docids"); debug!(database = "facet-id-is-null-docids", "merge");
match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => { Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
@ -145,7 +146,7 @@ pub(crate) fn data_from_obkv_documents(
{ {
let lmdb_writer_sx = lmdb_writer_sx.clone(); let lmdb_writer_sx = lmdb_writer_sx.clone();
rayon::spawn(move || { rayon::spawn(move || {
debug!("merge {} database", "facet-id-is-empty-docids"); debug!(database = "facet-id-is-empty-docids", "merge");
match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) { match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
Ok(reader) => { Ok(reader) => {
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
@ -257,13 +258,22 @@ fn spawn_extraction_task<FE, FS, M>(
M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static, M: MergeableReader + FromParallelIterator<M::Output> + Send + 'static,
M::Output: Send, M::Output: Send,
{ {
let current_span = tracing::Span::current();
rayon::spawn(move || { rayon::spawn(move || {
puffin::profile_scope!("extract_multiple_chunks", name); let child_span =
tracing::trace_span!(target: "", parent: &current_span, "extract_multiple_chunks");
let _entered = child_span.enter();
puffin::profile_scope!("extract_multiple_chunksdexing::details, ", name);
let chunks: Result<M> = let chunks: Result<M> =
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect(); chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
let current_span = tracing::Span::current();
rayon::spawn(move || match chunks { rayon::spawn(move || match chunks {
Ok(chunks) => { Ok(chunks) => {
debug!("merge {} database", name); let child_span = tracing::trace_span!(target: "", parent: &current_span, "merge_multiple_chunks");
let _entered = child_span.enter();
debug!(database = name, "merge");
puffin::profile_scope!("merge_multiple_chunks", name); puffin::profile_scope!("merge_multiple_chunks", name);
let reader = chunks.merge(merge_fn, &indexer); let reader = chunks.merge(merge_fn, &indexer);
let _ = lmdb_writer_sx.send(reader.map(serialize_fn)); let _ = lmdb_writer_sx.send(reader.map(serialize_fn));

View File

@ -56,6 +56,7 @@ pub fn create_sorter(
builder.build() builder.build()
} }
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
pub fn sorter_into_reader( pub fn sorter_into_reader(
sorter: grenad::Sorter<MergeFn>, sorter: grenad::Sorter<MergeFn>,
indexer: GrenadParameters, indexer: GrenadParameters,
@ -252,6 +253,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
/// Write provided sorter in database using serialize_value function. /// Write provided sorter in database using serialize_value function.
/// merge_values function is used if an entry already exist in the database. /// merge_values function is used if an entry already exist in the database.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::grenad")]
pub fn write_sorter_into_database<K, V, FS, FM>( pub fn write_sorter_into_database<K, V, FS, FM>(
sorter: Sorter<MergeFn>, sorter: Sorter<MergeFn>,
database: &heed::Database<K, V>, database: &heed::Database<K, V>,

View File

@ -13,11 +13,11 @@ use std::result::Result as StdResult;
use crossbeam_channel::{Receiver, Sender}; use crossbeam_channel::{Receiver, Sender};
use heed::types::Str; use heed::types::Str;
use heed::Database; use heed::Database;
use log::debug;
use rand::SeedableRng; use rand::SeedableRng;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use tracing::debug_span;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
use self::enrich::enrich_documents_batch; use self::enrich::enrich_documents_batch;
@ -134,6 +134,7 @@ where
/// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward. /// return an error and not the `IndexDocuments` struct as it is invalid to use it afterward.
/// ///
/// Returns the number of documents added to the builder. /// Returns the number of documents added to the builder.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn add_documents<R: Read + Seek>( pub fn add_documents<R: Read + Seek>(
mut self, mut self,
reader: DocumentsBatchReader<R>, reader: DocumentsBatchReader<R>,
@ -179,6 +180,7 @@ where
/// Remove a batch of documents from the current builder. /// Remove a batch of documents from the current builder.
/// ///
/// Returns the number of documents deleted from the builder. /// Returns the number of documents deleted from the builder.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn remove_documents( pub fn remove_documents(
mut self, mut self,
to_delete: Vec<String>, to_delete: Vec<String>,
@ -214,6 +216,7 @@ where
/// - No batching using the standards `remove_documents` and `add_documents` took place /// - No batching using the standards `remove_documents` and `add_documents` took place
/// ///
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
pub fn remove_documents_from_db_no_batch( pub fn remove_documents_from_db_no_batch(
mut self, mut self,
to_delete: &RoaringBitmap, to_delete: &RoaringBitmap,
@ -237,7 +240,12 @@ where
Ok((self, deleted_documents)) Ok((self, deleted_documents))
} }
#[logging_timer::time("IndexDocuments::{}")] #[tracing::instrument(
level = "trace"
skip_all,
target = "indexing::documents",
name = "index_documents"
)]
pub fn execute(mut self) -> Result<DocumentAdditionResult> { pub fn execute(mut self) -> Result<DocumentAdditionResult> {
puffin::profile_function!(); puffin::profile_function!();
@ -273,7 +281,12 @@ where
} }
/// Returns the total number of documents in the index after the update. /// Returns the total number of documents in the index after the update.
#[logging_timer::time("IndexDocuments::{}")] #[tracing::instrument(
level = "trace",
skip_all,
target = "profile::indexing::details",
name = "index_documents_raw"
)]
pub fn execute_raw(self, output: TransformOutput) -> Result<u64> pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
where where
FP: Fn(UpdateIndexingStep) + Sync, FP: Fn(UpdateIndexingStep) + Sync,
@ -374,8 +387,12 @@ where
let cloned_embedder = self.embedders.clone(); let cloned_embedder = self.embedders.clone();
let current_span = tracing::Span::current();
// Run extraction pipeline in parallel. // Run extraction pipeline in parallel.
pool.install(|| { pool.install(|| {
let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
let _enter = child_span.enter();
puffin::profile_scope!("extract_and_send_grenad_chunks"); puffin::profile_scope!("extract_and_send_grenad_chunks");
// split obkv file into several chunks // split obkv file into several chunks
let original_chunk_iter = let original_chunk_iter =
@ -489,10 +506,7 @@ where
documents_seen: documents_seen_count as usize, documents_seen: documents_seen_count as usize,
total_documents: documents_count, total_documents: documents_count,
}); });
debug!( debug_span!("Seen", documents = documents_seen_count, total = documents_count);
"We have seen {} documents on {} total document so far",
documents_seen_count, documents_count
);
} }
if is_merged_database { if is_merged_database {
databases_seen += 1; databases_seen += 1;
@ -543,7 +557,12 @@ where
Ok(number_of_documents) Ok(number_of_documents)
} }
#[logging_timer::time("IndexDocuments::{}")] #[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "index_documents_prefix_databases"
)]
pub fn execute_prefix_databases( pub fn execute_prefix_databases(
self, self,
word_docids: Option<grenad::Reader<CursorClonableMmap>>, word_docids: Option<grenad::Reader<CursorClonableMmap>>,
@ -598,6 +617,8 @@ where
let del_prefix_fst_words; let del_prefix_fst_words;
{ {
let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
let _entered = span.enter();
puffin::profile_scope!("compute_prefix_diffs"); puffin::profile_scope!("compute_prefix_diffs");
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?; current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
@ -722,6 +743,12 @@ where
/// Run the word prefix docids update operation. /// Run the word prefix docids update operation.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "index_documents_word_prefix_docids"
)]
fn execute_word_prefix_docids( fn execute_word_prefix_docids(
txn: &mut heed::RwTxn, txn: &mut heed::RwTxn,
reader: grenad::Reader<Cursor<ClonableMmap>>, reader: grenad::Reader<Cursor<ClonableMmap>>,

View File

@ -146,7 +146,7 @@ impl<'a, 'i> Transform<'a, 'i> {
}) })
} }
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn read_documents<R, FP, FA>( pub fn read_documents<R, FP, FA>(
&mut self, &mut self,
reader: EnrichedDocumentsBatchReader<R>, reader: EnrichedDocumentsBatchReader<R>,
@ -359,7 +359,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db, /// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids. /// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing. /// - If the document to remove was not present in either the db or the transform we do nothing.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
pub fn remove_documents<FA>( pub fn remove_documents<FA>(
&mut self, &mut self,
mut to_remove: Vec<String>, mut to_remove: Vec<String>,
@ -450,7 +450,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// - No batching using the standards `remove_documents` and `add_documents` took place /// - No batching using the standards `remove_documents` and `add_documents` took place
/// ///
/// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function. /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::details")]
pub fn remove_documents_from_db_no_batch<FA>( pub fn remove_documents_from_db_no_batch<FA>(
&mut self, &mut self,
to_remove: &RoaringBitmap, to_remove: &RoaringBitmap,
@ -541,6 +541,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// Flatten a document from the fields ids map contained in self and insert the new // Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened. // created fields. Returns `None` if the document doesn't need to be flattened.
#[tracing::instrument(level = "trace", skip(self, obkv), target = "indexing::transform")]
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> { fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
if obkv if obkv
.iter() .iter()
@ -661,7 +662,7 @@ impl<'a, 'i> Transform<'a, 'i> {
/// Generate the `TransformOutput` based on the given sorter that can be generated from any /// Generate the `TransformOutput` based on the given sorter that can be generated from any
/// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
/// id for the user side and the value must be an obkv where keys are valid fields ids. /// id for the user side and the value must be an obkv where keys are valid fields ids.
#[logging_timer::time] #[tracing::instrument(level = "trace", skip_all, target = "indexing::transform")]
pub(crate) fn output_from_sorter<F>( pub(crate) fn output_from_sorter<F>(
self, self,
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,

View File

@ -115,6 +115,7 @@ impl TypedChunk {
/// Write typed chunk in the corresponding LMDB database of the provided index. /// Write typed chunk in the corresponding LMDB database of the provided index.
/// Return new documents seen. /// Return new documents seen.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
pub(crate) fn write_typed_chunk_into_index( pub(crate) fn write_typed_chunk_into_index(
typed_chunk: TypedChunk, typed_chunk: TypedChunk,
index: &Index, index: &Index,
@ -126,6 +127,8 @@ pub(crate) fn write_typed_chunk_into_index(
let mut is_merged_database = false; let mut is_merged_database = false;
match typed_chunk { match typed_chunk {
TypedChunk::Documents(obkv_documents_iter) => { TypedChunk::Documents(obkv_documents_iter) => {
let span = tracing::trace_span!(target: "indexing::write_db", "documents");
let _entered = span.enter();
let mut operations: Vec<DocumentOperation> = Default::default(); let mut operations: Vec<DocumentOperation> = Default::default();
let mut docids = index.documents_ids(wtxn)?; let mut docids = index.documents_ids(wtxn)?;
@ -172,6 +175,9 @@ pub(crate) fn write_typed_chunk_into_index(
index.put_documents_ids(wtxn, &docids)?; index.put_documents_ids(wtxn, &docids)?;
} }
TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => { TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_word_count_docids");
let _entered = span.enter();
append_entries_into_database( append_entries_into_database(
fid_word_count_docids_iter, fid_word_count_docids_iter,
&index.field_id_word_count_docids, &index.field_id_word_count_docids,
@ -187,6 +193,8 @@ pub(crate) fn write_typed_chunk_into_index(
exact_word_docids_reader, exact_word_docids_reader,
word_fid_docids_reader, word_fid_docids_reader,
} => { } => {
let span = tracing::trace_span!(target: "indexing::write_db", "word_docids");
let _entered = span.enter();
let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?; let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
append_entries_into_database( append_entries_into_database(
word_docids_iter.clone(), word_docids_iter.clone(),
@ -230,6 +238,8 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::WordPositionDocids(word_position_docids_iter) => { TypedChunk::WordPositionDocids(word_position_docids_iter) => {
let span = tracing::trace_span!(target: "indexing::write_db", "word_position_docids");
let _entered = span.enter();
append_entries_into_database( append_entries_into_database(
word_position_docids_iter, word_position_docids_iter,
&index.word_position_docids, &index.word_position_docids,
@ -241,16 +251,25 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => { TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db","field_id_facet_number_docids");
let _entered = span.enter();
let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter); let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
indexer.execute(wtxn)?; indexer.execute(wtxn)?;
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => { TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_string_docids");
let _entered = span.enter();
let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter); let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter);
indexer.execute(wtxn)?; indexer.execute(wtxn)?;
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => { TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_exists_docids");
let _entered = span.enter();
append_entries_into_database( append_entries_into_database(
facet_id_exists_docids, facet_id_exists_docids,
&index.facet_id_exists_docids, &index.facet_id_exists_docids,
@ -262,6 +281,9 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => { TypedChunk::FieldIdFacetIsNullDocids(facet_id_is_null_docids) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_null_docids");
let _entered = span.enter();
append_entries_into_database( append_entries_into_database(
facet_id_is_null_docids, facet_id_is_null_docids,
&index.facet_id_is_null_docids, &index.facet_id_is_null_docids,
@ -273,6 +295,8 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => { TypedChunk::FieldIdFacetIsEmptyDocids(facet_id_is_empty_docids) => {
let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
let _entered = span.enter();
append_entries_into_database( append_entries_into_database(
facet_id_is_empty_docids, facet_id_is_empty_docids,
&index.facet_id_is_empty_docids, &index.facet_id_is_empty_docids,
@ -284,6 +308,9 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => { TypedChunk::WordPairProximityDocids(word_pair_proximity_docids_iter) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "word_pair_proximity_docids");
let _entered = span.enter();
append_entries_into_database( append_entries_into_database(
word_pair_proximity_docids_iter, word_pair_proximity_docids_iter,
&index.word_pair_proximity_docids, &index.word_pair_proximity_docids,
@ -295,6 +322,9 @@ pub(crate) fn write_typed_chunk_into_index(
is_merged_database = true; is_merged_database = true;
} }
TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => { TypedChunk::FieldIdDocidFacetNumbers(fid_docid_facet_number) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_numbers");
let _entered = span.enter();
let index_fid_docid_facet_numbers = let index_fid_docid_facet_numbers =
index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>(); index.field_id_docid_facet_f64s.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_number.into_cursor()?; let mut cursor = fid_docid_facet_number.into_cursor()?;
@ -315,6 +345,9 @@ pub(crate) fn write_typed_chunk_into_index(
} }
} }
TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => { TypedChunk::FieldIdDocidFacetStrings(fid_docid_facet_string) => {
let span =
tracing::trace_span!(target: "indexing::write_db", "field_id_docid_facet_strings");
let _entered = span.enter();
let index_fid_docid_facet_strings = let index_fid_docid_facet_strings =
index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>(); index.field_id_docid_facet_strings.remap_types::<Bytes, Bytes>();
let mut cursor = fid_docid_facet_string.into_cursor()?; let mut cursor = fid_docid_facet_string.into_cursor()?;
@ -335,6 +368,8 @@ pub(crate) fn write_typed_chunk_into_index(
} }
} }
TypedChunk::GeoPoints(geo_points) => { TypedChunk::GeoPoints(geo_points) => {
let span = tracing::trace_span!(target: "indexing::write_db", "geo_points");
let _entered = span.enter();
let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default(); let mut rtree = index.geo_rtree(wtxn)?.unwrap_or_default();
let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?; let mut geo_faceted_docids = index.geo_faceted_documents_ids(wtxn)?;
@ -365,6 +400,8 @@ pub(crate) fn write_typed_chunk_into_index(
expected_dimension, expected_dimension,
embedder_name, embedder_name,
} => { } => {
let span = tracing::trace_span!(target: "indexing::write_db", "vector_points");
let _entered = span.enter();
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?; )?;
@ -480,9 +517,11 @@ pub(crate) fn write_typed_chunk_into_index(
} }
} }
log::debug!("Finished vector chunk for {}", embedder_name); tracing::debug!("Finished vector chunk for {}", embedder_name);
} }
TypedChunk::ScriptLanguageDocids(sl_map) => { TypedChunk::ScriptLanguageDocids(sl_map) => {
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
let _entered = span.enter();
for (key, (deletion, addition)) in sl_map { for (key, (deletion, addition)) in sl_map {
let mut db_key_exists = false; let mut db_key_exists = false;
let final_value = match index.script_language_docids.get(wtxn, &key)? { let final_value = match index.script_language_docids.get(wtxn, &key)? {
@ -536,6 +575,7 @@ fn merge_word_docids_reader_into_fst(
/// Write provided entries in database using serialize_value function. /// Write provided entries in database using serialize_value function.
/// merge_values function is used if an entry already exist in the database. /// merge_values function is used if an entry already exist in the database.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn write_entries_into_database<R, K, V, FS, FM>( fn write_entries_into_database<R, K, V, FS, FM>(
data: grenad::Reader<R>, data: grenad::Reader<R>,
database: &heed::Database<K, V>, database: &heed::Database<K, V>,
@ -582,6 +622,7 @@ where
/// merge_values function is used if an entry already exist in the database. /// merge_values function is used if an entry already exist in the database.
/// All provided entries must be ordered. /// All provided entries must be ordered.
/// If the index is not empty, write_entries_into_database is called instead. /// If the index is not empty, write_entries_into_database is called instead.
#[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
fn append_entries_into_database<R, K, V, FS, FM>( fn append_entries_into_database<R, K, V, FS, FM>(
data: grenad::Reader<R>, data: grenad::Reader<R>,
database: &heed::Database<K, V>, database: &heed::Database<K, V>,

View File

@ -372,6 +372,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.embedder_settings = Setting::Reset; self.embedder_settings = Setting::Reset;
} }
#[tracing::instrument(
level = "trace"
skip(self, progress_callback, should_abort, old_fields_ids_map),
target = "indexing::documents"
)]
fn reindex<FP, FA>( fn reindex<FP, FA>(
&mut self, &mut self,
progress_callback: &FP, progress_callback: &FP,

View File

@ -39,7 +39,12 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
} }
} }
#[logging_timer::time("WordPrefixDocids::{}")] #[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "word_prefix_docids"
)]
pub fn execute( pub fn execute(
self, self,
mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>, mut new_word_docids_iter: grenad::ReaderCursor<CursorClonableMmap>,

View File

@ -4,7 +4,7 @@ use std::str;
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database}; use heed::{BytesDecode, BytesEncode, Database};
use log::debug; use tracing::debug;
use crate::error::SerializationError; use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec; use crate::heed_codec::StrBEU16Codec;
@ -44,7 +44,12 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
} }
} }
#[logging_timer::time("WordPrefixIntegerDocids::{}")] #[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "words_prefix_integer_docids"
)]
pub fn execute( pub fn execute(
self, self,
new_word_integer_docids: grenad::Reader<CursorClonableMmap>, new_word_integer_docids: grenad::Reader<CursorClonableMmap>,

View File

@ -38,7 +38,12 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
self self
} }
#[logging_timer::time("WordsPrefixesFst::{}")] #[tracing::instrument(
level = "trace",
skip_all,
target = "indexing::prefix",
name = "words_prefix_fst"
)]
pub fn execute(self) -> Result<()> { pub fn execute(self) -> Result<()> {
puffin::profile_function!(); puffin::profile_function!();

View File

@ -73,7 +73,7 @@ impl Embedder {
let device = match candle_core::Device::cuda_if_available(0) { let device = match candle_core::Device::cuda_if_available(0) {
Ok(device) => device, Ok(device) => device,
Err(error) => { Err(error) => {
log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
candle_core::Device::Cpu candle_core::Device::Cpu
} }
}; };

View File

@ -173,12 +173,16 @@ impl Embedder {
let retry_duration = match result { let retry_duration = match result {
Ok(embeddings) => return Ok(embeddings), Ok(embeddings) => return Ok(embeddings),
Err(retry) => { Err(retry) => {
log::warn!("Failed: {}", retry.error); tracing::warn!("Failed: {}", retry.error);
tokenized |= retry.must_tokenize(); tokenized |= retry.must_tokenize();
retry.into_duration(attempt) retry.into_duration(attempt)
} }
}?; }?;
log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis()); tracing::warn!(
"Attempt #{}, retrying after {}ms.",
attempt,
retry_duration.as_millis()
);
tokio::time::sleep(retry_duration).await; tokio::time::sleep(retry_duration).await;
} }
@ -244,7 +248,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected) .map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?; .map_err(Retry::retry_later)?;
log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
error_response.error, error_response.error,
@ -266,7 +270,7 @@ impl Embedder {
client: &reqwest::Client, client: &reqwest::Client,
) -> Result<Vec<Embeddings<f32>>, Retry> { ) -> Result<Vec<Embeddings<f32>>, Retry> {
for text in texts { for text in texts {
log::trace!("Received prompt: {}", text.as_ref()) tracing::trace!("Received prompt: {}", text.as_ref())
} }
let request = OpenAiRequest { let request = OpenAiRequest {
model: self.options.embedding_model.name(), model: self.options.embedding_model.name(),
@ -289,7 +293,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected) .map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?; .map_err(Retry::retry_later)?;
log::trace!("response: {:?}", response.data); tracing::trace!("response: {:?}", response.data);
Ok(response Ok(response
.data .data

23
tracing-trace/Cargo.toml Normal file
View File

@ -0,0 +1,23 @@
[package]
name = "tracing-trace"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
color-spantrace = "0.2.1"
fxprof-processed-profile = "0.6.0"
serde = { version = "1.0.195", features = ["derive"] }
serde_json = "1.0.111"
tracing = "0.1.40"
tracing-error = "0.2.0"
tracing-subscriber = "0.3.18"
byte-unit = { version = "4.0.19", default-features = false, features = [
"std",
"serde",
] }
tokio = { version = "1.35.1", features = ["sync"] }
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies]
libproc = "0.14.2"

View File

@ -0,0 +1,21 @@
use std::ffi::OsString;
use std::io::Write;
use serde_json::json;
fn main() {
let input_file = std::env::args_os().nth(1).expect("missing <INPUT> file");
let input =
std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open <INPUT>"));
let trace = tracing_trace::TraceReader::new(input);
let profile = tracing_trace::processor::span_stats::to_call_stats(trace).unwrap();
let mut output_file = OsString::new();
output_file.push("callstats-");
output_file.push(input_file);
let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap());
for (key, value) in profile {
serde_json::to_writer(&mut output_file, &json!({key: value})).unwrap();
writeln!(&mut output_file).unwrap();
}
output_file.flush().unwrap();
}

View File

@ -0,0 +1,18 @@
use std::ffi::OsString;
use std::io::Write;
fn main() {
let input_file = std::env::args_os().nth(1).expect("missing <INPUT> file");
let input =
std::io::BufReader::new(std::fs::File::open(&input_file).expect("could not open <INPUT>"));
let trace = tracing_trace::TraceReader::new(input);
let profile =
tracing_trace::processor::firefox_profiler::to_firefox_profile(trace, "Meilisearch")
.unwrap();
let mut output_file = OsString::new();
output_file.push("firefox-");
output_file.push(input_file);
let mut output_file = std::io::BufWriter::new(std::fs::File::create(output_file).unwrap());
serde_json::to_writer(&mut output_file, &profile).unwrap();
output_file.flush().unwrap();
}

141
tracing-trace/src/entry.rs Normal file
View File

@ -0,0 +1,141 @@
use std::borrow::Cow;
use serde::{Deserialize, Serialize};
use tracing::span::Id as TracingId;
#[derive(Debug, Serialize, Deserialize)]
pub enum Entry {
/// A code location was accessed for the first time
NewCallsite(NewCallsite),
/// A new thread was accessed
NewThread(NewThread),
/// A new call started
NewSpan(NewSpan),
/// An already in-flight call started doing work.
///
/// For synchronous functions, open should always be followed immediately by enter, exit and close,
/// but for asynchronous functions, work can suspend (exiting the span without closing it), and then
/// later resume (entering the span again without opening it).
///
/// The timer for a span only starts when the span is entered.
SpanEnter(SpanEnter),
/// An in-flight call suspended and paused work.
///
/// For synchronous functions, exit should always be followed immediately by close,
/// but for asynchronous functions, work can suspend and then later resume.
///
/// The timer for a span pauses when the span is exited.
SpanExit(SpanExit),
/// A call ended
SpanClose(SpanClose),
/// An event occurred
Event(Event),
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct SpanId(u64);
impl From<&TracingId> for SpanId {
fn from(value: &TracingId) -> Self {
Self(value.into_u64())
}
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NewCallsite {
pub call_id: ResourceId,
pub name: Cow<'static, str>,
pub module_path: Option<Cow<'static, str>>,
pub file: Option<Cow<'static, str>>,
pub line: Option<u32>,
pub target: Cow<'static, str>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct NewThread {
pub thread_id: ResourceId,
pub name: Option<String>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct SpanEnter {
pub id: SpanId,
pub time: std::time::Duration,
pub memory: Option<MemoryStats>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct SpanExit {
pub id: SpanId,
pub time: std::time::Duration,
pub memory: Option<MemoryStats>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct Event {
pub call_id: ResourceId,
pub thread_id: ResourceId,
pub parent_id: Option<SpanId>,
pub time: std::time::Duration,
pub memory: Option<MemoryStats>,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct NewSpan {
pub id: SpanId,
pub call_id: ResourceId,
pub parent_id: Option<SpanId>,
pub thread_id: ResourceId,
}
#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
pub struct SpanClose {
pub id: SpanId,
pub time: std::time::Duration,
}
/// A struct with a memory allocation stat.
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
pub struct MemoryStats {
/// Resident set size, measured in bytes.
/// (same as VmRSS in /proc/<pid>/status).
pub resident: u64,
}
impl MemoryStats {
#[cfg(any(target_os = "linux", target_os = "macos"))]
pub fn fetch() -> Option<Self> {
use libproc::libproc::pid_rusage::{pidrusage, RUsageInfoV0};
match pidrusage(std::process::id() as i32) {
Ok(RUsageInfoV0 { ri_resident_size, .. }) => {
Some(MemoryStats { resident: ri_resident_size })
}
Err(_) => None, /* ignoring error to avoid spamming */
}
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
pub fn fetch() -> Option<Self> {
None
}
pub fn checked_sub(self, other: Self) -> Option<Self> {
Some(Self { resident: self.resident.checked_sub(other.resident)? })
}
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct ResourceId(pub(crate) usize);
impl ResourceId {
pub fn to_usize(self) -> usize {
self.0
}
}

View File

@ -0,0 +1,21 @@
#[derive(Debug)]
pub enum Error {
Json(serde_json::Error),
}
impl std::error::Error for Error {}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("error de/serializing trace entry:")?;
match self {
Error::Json(error) => std::fmt::Display::fmt(&error, f),
}
}
}
impl From<serde_json::Error> for Error {
fn from(value: serde_json::Error) -> Self {
Self::Json(value)
}
}

208
tracing-trace/src/layer.rs Normal file
View File

@ -0,0 +1,208 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::Write;
use std::ops::ControlFlow;
use std::sync::RwLock;
use tracing::span::{Attributes, Id as TracingId};
use tracing::{Metadata, Subscriber};
use tracing_subscriber::layer::Context;
use tracing_subscriber::Layer;
use crate::entry::{
Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter,
SpanExit, SpanId,
};
use crate::{Error, Trace, TraceWriter};
/// Layer that measures the time spent in spans.
pub struct TraceLayer {
sender: tokio::sync::mpsc::UnboundedSender<Entry>,
callsites: RwLock<HashMap<OpaqueIdentifier, ResourceId>>,
start_time: std::time::Instant,
profile_memory: bool,
}
impl Trace {
pub fn new(profile_memory: bool) -> (Self, TraceLayer) {
let (sender, receiver) = tokio::sync::mpsc::unbounded_channel();
let trace = Trace { receiver };
let layer = TraceLayer {
sender,
callsites: Default::default(),
start_time: std::time::Instant::now(),
profile_memory,
};
(trace, layer)
}
}
impl<W: Write> TraceWriter<W> {
pub fn new(writer: W, profile_memory: bool) -> (Self, TraceLayer) {
let (trace, layer) = Trace::new(profile_memory);
(trace.into_writer(writer), layer)
}
pub async fn receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
let Some(entry) = self.receiver.recv().await else {
return Ok(ControlFlow::Break(()));
};
self.write(entry)?;
Ok(ControlFlow::Continue(()))
}
/// Panics if called from an asynchronous context
pub fn blocking_receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
let Some(entry) = self.receiver.blocking_recv() else {
return Ok(ControlFlow::Break(()));
};
self.write(entry)?;
Ok(ControlFlow::Continue(()))
}
pub fn write(&mut self, entry: Entry) -> Result<(), Error> {
Ok(serde_json::ser::to_writer(&mut self.writer, &entry)?)
}
pub fn try_receive(&mut self) -> Result<ControlFlow<(), ()>, Error> {
let Ok(entry) = self.receiver.try_recv() else {
return Ok(ControlFlow::Break(()));
};
self.write(entry)?;
Ok(ControlFlow::Continue(()))
}
pub fn flush(&mut self) -> Result<(), std::io::Error> {
self.writer.flush()
}
}
#[derive(PartialEq, Eq, Hash)]
enum OpaqueIdentifier {
Thread(std::thread::ThreadId),
Call(tracing::callsite::Identifier),
}
impl TraceLayer {
fn resource_id(&self, opaque: OpaqueIdentifier) -> Option<ResourceId> {
self.callsites.read().unwrap().get(&opaque).copied()
}
fn register_resource_id(&self, opaque: OpaqueIdentifier) -> ResourceId {
let mut map = self.callsites.write().unwrap();
let len = map.len();
*map.entry(opaque).or_insert(ResourceId(len))
}
fn elapsed(&self) -> std::time::Duration {
self.start_time.elapsed()
}
fn memory_stats(&self) -> Option<MemoryStats> {
if self.profile_memory {
MemoryStats::fetch()
} else {
None
}
}
fn send(&self, entry: Entry) {
// we never care that the other end hanged on us
let _ = self.sender.send(entry);
}
fn register_callsite(&self, metadata: &'static Metadata<'static>) -> ResourceId {
let call_id = self.register_resource_id(OpaqueIdentifier::Call(metadata.callsite()));
let module_path = metadata.module_path();
let file = metadata.file();
let line = metadata.line();
let name = metadata.name();
let target = metadata.target();
self.send(Entry::NewCallsite(NewCallsite {
call_id,
module_path: module_path.map(Cow::Borrowed),
file: file.map(Cow::Borrowed),
line,
name: Cow::Borrowed(name),
target: Cow::Borrowed(target),
}));
call_id
}
fn register_thread(&self) -> ResourceId {
let thread_id = std::thread::current().id();
let name = std::thread::current().name().map(ToOwned::to_owned);
let thread_id = self.register_resource_id(OpaqueIdentifier::Thread(thread_id));
self.send(Entry::NewThread(NewThread { thread_id, name }));
thread_id
}
}
impl<S> Layer<S> for TraceLayer
where
S: Subscriber,
{
fn on_new_span(&self, attrs: &Attributes<'_>, id: &TracingId, _ctx: Context<'_, S>) {
let call_id = self
.resource_id(OpaqueIdentifier::Call(attrs.metadata().callsite()))
.unwrap_or_else(|| self.register_callsite(attrs.metadata()));
let thread_id = self
.resource_id(OpaqueIdentifier::Thread(std::thread::current().id()))
.unwrap_or_else(|| self.register_thread());
let parent_id = attrs
.parent()
.cloned()
.or_else(|| tracing::Span::current().id())
.map(|id| SpanId::from(&id));
self.send(Entry::NewSpan(NewSpan { id: id.into(), call_id, parent_id, thread_id }));
}
fn on_enter(&self, id: &TracingId, _ctx: Context<'_, S>) {
self.send(Entry::SpanEnter(SpanEnter {
id: id.into(),
time: self.elapsed(),
memory: self.memory_stats(),
}))
}
fn on_exit(&self, id: &TracingId, _ctx: Context<'_, S>) {
self.send(Entry::SpanExit(SpanExit {
id: id.into(),
time: self.elapsed(),
memory: self.memory_stats(),
}))
}
fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) {
let call_id = self
.resource_id(OpaqueIdentifier::Call(event.metadata().callsite()))
.unwrap_or_else(|| self.register_callsite(event.metadata()));
let thread_id = self
.resource_id(OpaqueIdentifier::Thread(std::thread::current().id()))
.unwrap_or_else(|| self.register_thread());
let parent_id = event
.parent()
.cloned()
.or_else(|| tracing::Span::current().id())
.map(|id| SpanId::from(&id));
self.send(Entry::Event(Event {
call_id,
thread_id,
parent_id,
time: self.elapsed(),
memory: self.memory_stats(),
}))
}
fn on_close(&self, id: TracingId, _ctx: Context<'_, S>) {
self.send(Entry::SpanClose(SpanClose { id: Into::into(&id), time: self.elapsed() }))
}
}

54
tracing-trace/src/lib.rs Normal file
View File

@ -0,0 +1,54 @@
use std::io::{Read, Write};
use entry::Entry;
pub mod entry;
mod error;
pub mod layer;
pub mod processor;
pub use error::Error;
pub struct TraceWriter<W: Write> {
writer: W,
receiver: tokio::sync::mpsc::UnboundedReceiver<Entry>,
}
pub struct Trace {
receiver: tokio::sync::mpsc::UnboundedReceiver<Entry>,
}
impl Trace {
pub fn into_receiver(self) -> tokio::sync::mpsc::UnboundedReceiver<Entry> {
self.receiver
}
pub fn into_writer<W: Write>(self, writer: W) -> TraceWriter<W> {
TraceWriter { writer, receiver: self.receiver }
}
}
pub struct TraceReader<R: Read> {
reader: R,
}
impl<R: Read> TraceReader<R> {
pub fn new(reader: R) -> Self {
Self { reader }
}
fn read(&mut self) -> Option<Result<Entry, Error>> {
serde_json::Deserializer::from_reader(&mut self.reader)
.into_iter()
.next()
.map(|res| res.map_err(Into::into))
}
}
impl<R: Read> Iterator for TraceReader<R> {
type Item = Result<Entry, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.read()
}
}

122
tracing-trace/src/main.rs Normal file
View File

@ -0,0 +1,122 @@
use tracing::{instrument, Span};
use tracing_error::{ErrorLayer, InstrumentResult, SpanTrace, TracedError};
#[instrument(level = "trace", target = "profile::indexing")]
fn foo() -> Result<(), TracedError<Error>> {
let _ = bar(40, 2);
bar(40, 2)
}
#[derive(Debug)]
pub enum Error {
XTooBig,
}
impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("x too big")
}
}
impl std::error::Error for Error {}
#[instrument(level = "trace", target = "profile::indexing")]
fn bar(x: u32, y: u32) -> Result<(), TracedError<Error>> {
let handle_ok = spawn_in_current_scope(move || baz(y));
let handle = spawn_in_current_scope(move || baz(x + y));
handle_ok.join().unwrap().and(handle.join().unwrap())
}
pub fn spawn_in_current_scope<F, T>(f: F) -> std::thread::JoinHandle<T>
where
F: FnOnce() -> T + Send + 'static,
T: Send + 'static,
{
let current = Span::current();
std::thread::spawn(move || {
let span = tracing::trace_span!(parent: &current, "thread_spawn", id = ?std::thread::current().id(), name = tracing::field::Empty);
if let Some(name) = std::thread::current().name() {
span.record("name", name);
}
span.in_scope(f)
})
}
#[instrument(level = "trace", target = "profile::indexing")]
fn baz(x: u32) -> Result<(), TracedError<Error>> {
if x > 10 {
fibo_recursive(10);
return Err(Error::XTooBig).in_current_span();
}
Ok(())
}
#[instrument(level = "trace", target = "profile::indexing")]
fn fibo_recursive(n: u32) -> u32 {
if n == 0 {
return 1;
}
if n == 1 {
return 2;
}
return fibo_recursive(n - 1) - fibo_recursive(n - 2);
}
use tracing_error::ExtractSpanTrace as _;
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_trace::processor;
fn on_panic(info: &std::panic::PanicInfo) {
let info = info.to_string();
let trace = SpanTrace::capture();
tracing::error!(%info, %trace);
}
fn main() {
let (mut trace, profiling_layer) =
tracing_trace::TraceWriter::new(std::fs::File::create("trace.json").unwrap(), true);
let subscriber = tracing_subscriber::registry()
// any number of other subscriber layers may be added before or
// after the `ErrorLayer`...
.with(ErrorLayer::default())
.with(profiling_layer);
// set the subscriber as the default for the application
tracing::subscriber::set_global_default(subscriber).unwrap();
std::panic::set_hook(Box::new(on_panic));
let res = foo();
if let Err(error) = res {
print_extracted_spantraces(&error)
}
while trace.try_receive().unwrap().is_continue() {}
trace.flush().unwrap();
let trace = tracing_trace::TraceReader::new(std::fs::File::open("trace.json").unwrap());
let profile = processor::firefox_profiler::to_firefox_profile(trace, "test").unwrap();
serde_json::to_writer(std::fs::File::create("processed.json").unwrap(), &profile).unwrap();
}
fn print_extracted_spantraces(error: &(dyn std::error::Error + 'static)) {
let mut error = Some(error);
let mut ind = 0;
eprintln!("Error:");
while let Some(err) = error {
if let Some(spantrace) = err.span_trace() {
eprintln!("found a spantrace:\n{}", color_spantrace::colorize(spantrace));
} else {
eprintln!("{:>4}: {}", ind, err);
}
error = err.source();
ind += 1;
}
}

View File

@ -0,0 +1,450 @@
use std::collections::HashMap;
use fxprof_processed_profile::{
CategoryPairHandle, CounterHandle, CpuDelta, Frame, FrameFlags, FrameInfo, MarkerDynamicField,
MarkerFieldFormat, MarkerLocation, MarkerSchema, MarkerSchemaField, ProcessHandle, Profile,
ProfilerMarker, ReferenceTimestamp, SamplingInterval, StringHandle, Timestamp,
};
use serde_json::json;
use crate::entry::{
Entry, Event, MemoryStats, NewCallsite, NewSpan, ResourceId, SpanClose, SpanEnter, SpanExit,
SpanId,
};
use crate::{Error, TraceReader};
pub fn to_firefox_profile<R: std::io::Read>(
trace: TraceReader<R>,
app: &str,
) -> Result<Profile, Error> {
let mut profile = Profile::new(
app,
ReferenceTimestamp::from_millis_since_unix_epoch(0.0),
SamplingInterval::from_nanos(15),
);
let mut last_timestamp = Timestamp::from_nanos_since_reference(0);
let main = profile.add_process(app, 0, last_timestamp);
let mut calls = HashMap::new();
let mut threads = HashMap::new();
let mut spans = HashMap::new();
let category = profile.add_category("general", fxprof_processed_profile::CategoryColor::Blue);
let subcategory = profile.add_subcategory(category, "subcategory");
let mut last_memory = MemoryStats::default();
let mut memory_counters = None;
for entry in trace {
let entry = entry?;
match entry {
Entry::NewCallsite(callsite) => {
let string_handle = profile.intern_string(callsite.name.as_ref());
calls.insert(callsite.call_id, (callsite, string_handle));
}
Entry::NewThread(thread) => {
let thread_handle = profile.add_thread(
main,
thread.thread_id.to_usize() as u32,
last_timestamp,
threads.is_empty(),
);
if let Some(name) = &thread.name {
profile.set_thread_name(thread_handle, name)
}
threads.insert(thread.thread_id, thread_handle);
}
Entry::NewSpan(span) => {
spans.insert(span.id, (span, SpanStatus::Outside));
}
Entry::SpanEnter(SpanEnter { id, time, memory }) => {
let (_span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Outside = status else {
continue;
};
*status = SpanStatus::Inside { time, memory };
last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64);
add_memory_samples(
&mut profile,
main,
memory,
last_timestamp,
&mut memory_counters,
&mut last_memory,
);
}
Entry::SpanExit(SpanExit { id, time, memory }) => {
let (span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Inside { time: begin, memory: begin_memory } = status else {
continue;
};
last_timestamp = Timestamp::from_nanos_since_reference(time.as_nanos() as u64);
let begin = *begin;
let begin_memory = *begin_memory;
*status = SpanStatus::Outside;
let span = *span;
let thread_handle = threads.get(&span.thread_id).unwrap();
let frames = make_frames(span, &spans, &calls, subcategory);
profile.add_sample(
*thread_handle,
to_timestamp(begin),
frames.iter().rev().cloned(),
CpuDelta::ZERO,
1,
);
profile.add_sample(
*thread_handle,
to_timestamp(time),
frames.iter().rev().cloned(),
CpuDelta::from_nanos((time - begin).as_nanos() as u64),
1,
);
add_memory_samples(
&mut profile,
main,
memory,
last_timestamp,
&mut memory_counters,
&mut last_memory,
);
let (callsite, _) = calls.get(&span.call_id).unwrap();
let memory_delta =
begin_memory.zip(memory).and_then(|(begin, end)| end.checked_sub(begin));
let marker = SpanMarker { callsite, span: &span, memory_delta };
profile.add_marker_with_stack(
*thread_handle,
&callsite.name,
marker,
fxprof_processed_profile::MarkerTiming::Interval(
to_timestamp(begin),
to_timestamp(time),
),
frames.iter().rev().cloned(),
)
}
Entry::Event(event) => {
let span = event
.parent_id
.as_ref()
.and_then(|parent_id| spans.get(parent_id))
.and_then(|(span, status)| match status {
SpanStatus::Outside => None,
SpanStatus::Inside { .. } => Some(span),
})
.copied();
let timestamp = to_timestamp(event.time);
let thread_handle = threads.get(&event.thread_id).unwrap();
let frames = span
.map(|span| make_frames(span, &spans, &calls, subcategory))
.unwrap_or_default();
profile.add_sample(
*thread_handle,
timestamp,
frames.iter().rev().cloned(),
CpuDelta::ZERO,
1,
);
let memory_delta = add_memory_samples(
&mut profile,
main,
event.memory,
last_timestamp,
&mut memory_counters,
&mut last_memory,
);
let (callsite, _) = calls.get(&event.call_id).unwrap();
let marker = EventMarker { callsite, event: &event, memory_delta };
profile.add_marker_with_stack(
*thread_handle,
&callsite.name,
marker,
fxprof_processed_profile::MarkerTiming::Instant(timestamp),
frames.iter().rev().cloned(),
);
last_timestamp = timestamp;
}
Entry::SpanClose(SpanClose { id, time }) => {
spans.remove(&id);
last_timestamp = to_timestamp(time);
}
}
}
Ok(profile)
}
struct MemoryCounterHandles {
usage: CounterHandle,
}
impl MemoryCounterHandles {
fn new(profile: &mut Profile, main: ProcessHandle) -> Self {
let usage =
profile.add_counter(main, "mimmalloc", "Memory", "Amount of memory currently in use");
Self { usage }
}
}
fn add_memory_samples(
profile: &mut Profile,
main: ProcessHandle,
memory: Option<MemoryStats>,
last_timestamp: Timestamp,
memory_counters: &mut Option<MemoryCounterHandles>,
last_memory: &mut MemoryStats,
) -> Option<MemoryStats> {
let Some(stats) = memory else {
return None;
};
let memory_counters =
memory_counters.get_or_insert_with(|| MemoryCounterHandles::new(profile, main));
profile.add_counter_sample(
memory_counters.usage,
last_timestamp,
stats.resident as f64 - last_memory.resident as f64,
0,
);
let delta = stats.checked_sub(*last_memory);
*last_memory = stats;
delta
}
fn to_timestamp(time: std::time::Duration) -> Timestamp {
Timestamp::from_nanos_since_reference(time.as_nanos() as u64)
}
fn make_frames(
span: NewSpan,
spans: &HashMap<SpanId, (NewSpan, SpanStatus)>,
calls: &HashMap<ResourceId, (NewCallsite, StringHandle)>,
subcategory: CategoryPairHandle,
) -> Vec<FrameInfo> {
let mut frames = Vec::new();
let mut current_span = span;
loop {
let frame = make_frame(current_span, calls, subcategory);
frames.push(frame);
if let Some(parent) = current_span.parent_id {
current_span = spans.get(&parent).unwrap().0;
} else {
break;
}
}
frames
}
fn make_frame(
span: NewSpan,
calls: &HashMap<ResourceId, (NewCallsite, StringHandle)>,
subcategory: CategoryPairHandle,
) -> FrameInfo {
let (_, call) = calls.get(&span.call_id).unwrap();
FrameInfo { frame: Frame::Label(*call), category_pair: subcategory, flags: FrameFlags::empty() }
}
#[derive(Debug, Clone, Copy)]
enum SpanStatus {
Outside,
Inside { time: std::time::Duration, memory: Option<MemoryStats> },
}
struct SpanMarker<'a> {
span: &'a NewSpan,
callsite: &'a NewCallsite,
memory_delta: Option<MemoryStats>,
}
impl<'a> ProfilerMarker for SpanMarker<'a> {
const MARKER_TYPE_NAME: &'static str = "span";
fn schema() -> MarkerSchema {
let fields = vec![
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "filename",
label: "File name",
format: MarkerFieldFormat::FilePath,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "line",
label: "Line",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "module_path",
label: "Module path",
format: MarkerFieldFormat::String,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "span_id",
label: "Span ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "thread_id",
label: "Thread ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "resident",
label: "Resident set size, measured in bytes while this function was executing",
format: MarkerFieldFormat::Bytes,
searchable: false,
}),
];
MarkerSchema {
type_name: Self::MARKER_TYPE_NAME,
locations: vec![
MarkerLocation::MarkerTable,
MarkerLocation::MarkerChart,
MarkerLocation::TimelineOverview,
],
chart_label: None,
tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"),
table_label: Some("{marker.data.filename}:{marker.data.line}"),
fields,
}
}
fn json_marker_data(&self) -> serde_json::Value {
let filename = self.callsite.file.as_deref();
let line = self.callsite.line;
let module_path = self.callsite.module_path.as_deref();
let span_id = self.span.id;
let thread_id = self.span.thread_id;
let mut value = json!({
"type": Self::MARKER_TYPE_NAME,
"filename": filename,
"line": line,
"module_path": module_path,
"span_id": span_id,
"thread_id": thread_id,
});
if let Some(MemoryStats { resident }) = self.memory_delta {
value["resident"] = json!(resident);
}
value
}
}
struct EventMarker<'a> {
event: &'a Event,
callsite: &'a NewCallsite,
memory_delta: Option<MemoryStats>,
}
impl<'a> ProfilerMarker for EventMarker<'a> {
const MARKER_TYPE_NAME: &'static str = "tracing-event";
fn schema() -> MarkerSchema {
let fields = vec![
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "filename",
label: "File name",
format: MarkerFieldFormat::FilePath,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "line",
label: "Line",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "module_path",
label: "Module path",
format: MarkerFieldFormat::String,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "parent_span_id",
label: "Parent Span ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "thread_id",
label: "Thread ID",
format: MarkerFieldFormat::Integer,
searchable: true,
}),
MarkerSchemaField::Dynamic(MarkerDynamicField {
key: "resident",
label: "Resident set size, measured in bytes while this function was executing",
format: MarkerFieldFormat::Bytes,
searchable: false,
}),
];
MarkerSchema {
type_name: Self::MARKER_TYPE_NAME,
locations: vec![
MarkerLocation::MarkerTable,
MarkerLocation::MarkerChart,
MarkerLocation::TimelineOverview,
],
chart_label: None,
tooltip_label: Some("{marker.name} - {marker.data.filename}:{marker.data.line}"),
table_label: Some("{marker.data.filename}:{marker.data.line}"),
fields,
}
}
fn json_marker_data(&self) -> serde_json::Value {
let filename = self.callsite.file.as_deref();
let line = self.callsite.line;
let module_path = self.callsite.module_path.as_deref();
let span_id = self.event.parent_id;
let thread_id = self.event.thread_id;
let mut value = json!({
"type": Self::MARKER_TYPE_NAME,
"filename": filename,
"line": line,
"module_path": module_path,
"parent_span_id": span_id,
"thread_id": thread_id,
});
if let Some(MemoryStats { resident }) = self.memory_delta {
value["resident"] = json!(resident);
}
value
}
}

View File

@ -0,0 +1,195 @@
use std::collections::HashMap;
use std::io::Read;
use crate::entry::{
Entry, Event, MemoryStats, NewCallsite, NewSpan, NewThread, ResourceId, SpanClose, SpanEnter,
SpanExit, SpanId,
};
use crate::{Error, TraceReader};
#[derive(Debug, Clone, Copy)]
enum SpanStatus {
Outside,
Inside(std::time::Duration),
}
pub fn print_trace<R: Read>(trace: TraceReader<R>) -> Result<(), Error> {
let mut calls = HashMap::new();
let mut threads = HashMap::new();
let mut spans = HashMap::new();
for entry in trace {
let entry = entry?;
match entry {
Entry::NewCallsite(callsite) => {
calls.insert(callsite.call_id, callsite);
}
Entry::NewThread(NewThread { thread_id, name }) => {
threads.insert(thread_id, name);
}
Entry::NewSpan(span) => {
spans.insert(span.id, (span, SpanStatus::Outside));
}
Entry::SpanEnter(SpanEnter { id, time, memory }) => {
let (span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Outside = status else {
continue;
};
*status = SpanStatus::Inside(time);
let span = *span;
match memory {
Some(stats) => println!(
"[{}]{}::{} ({}) <-",
print_thread(&threads, span.thread_id),
print_backtrace(&spans, &calls, &span),
print_span(&calls, &span),
print_memory(stats),
),
None => println!(
"[{}]{}::{} <-",
print_thread(&threads, span.thread_id),
print_backtrace(&spans, &calls, &span),
print_span(&calls, &span),
),
}
}
Entry::SpanExit(SpanExit { id, time, memory }) => {
let (span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Inside(begin) = status else {
continue;
};
let begin = *begin;
*status = SpanStatus::Outside;
let span = *span;
match memory {
Some(stats) => println!(
"[{}]{}::{} ({}) -> {}",
print_thread(&threads, span.thread_id),
print_backtrace(&spans, &calls, &span),
print_span(&calls, &span),
print_memory(stats),
print_duration(time - begin),
),
None => println!(
"[{}]{}::{} -> {}",
print_thread(&threads, span.thread_id),
print_backtrace(&spans, &calls, &span),
print_span(&calls, &span),
print_duration(time - begin),
),
}
}
Entry::SpanClose(SpanClose { id, time: _ }) => {
spans.remove(&id);
}
Entry::Event(Event { call_id, thread_id, parent_id, time: _, memory }) => {
let parent_span = parent_id.and_then(|parent_id| spans.get(&parent_id)).and_then(
|(span, status)| match status {
SpanStatus::Outside => None,
SpanStatus::Inside(_) => Some(span),
},
);
match (parent_span, memory) {
(Some(parent_span), Some(stats)) => println!(
"[{}]{}::{} ({}) event: {}",
print_thread(&threads, thread_id),
print_backtrace(&spans, &calls, parent_span),
print_span(&calls, parent_span),
print_memory(stats),
print_call(&calls, call_id)
),
(Some(parent_span), None) => println!(
"[{}]{}::{} event: {}",
print_thread(&threads, thread_id),
print_backtrace(&spans, &calls, parent_span),
print_span(&calls, parent_span),
print_call(&calls, call_id)
),
(None, None) => println!(
"[{}] event: {}",
print_thread(&threads, thread_id),
print_call(&calls, call_id)
),
(None, Some(stats)) => println!(
"[{}] ({}) event: {}",
print_thread(&threads, thread_id),
print_memory(stats),
print_call(&calls, call_id)
),
}
}
}
}
Ok(())
}
fn print_thread(threads: &HashMap<ResourceId, Option<String>>, thread_id: ResourceId) -> String {
let thread = threads.get(&thread_id).unwrap();
let thread =
thread.as_ref().cloned().unwrap_or_else(|| format!("ThreadId({})", thread_id.to_usize()));
thread
}
fn print_backtrace(
spans: &HashMap<SpanId, (NewSpan, SpanStatus)>,
calls: &HashMap<ResourceId, NewCallsite>,
span: &NewSpan,
) -> String {
let mut parents = Vec::new();
let mut current = span.parent_id;
while let Some(current_id) = &current {
let (span, _) = spans.get(current_id).unwrap();
let callsite = calls.get(&span.call_id).unwrap();
parents.push(callsite.name.clone());
current = span.parent_id;
}
let x: Vec<String> = parents.into_iter().rev().map(|x| x.to_string()).collect();
x.join("::")
}
fn print_span(calls: &HashMap<ResourceId, NewCallsite>, span: &NewSpan) -> String {
print_call(calls, span.call_id)
}
fn print_call(calls: &HashMap<ResourceId, NewCallsite>, call_id: ResourceId) -> String {
let callsite = calls.get(&call_id).unwrap();
match (callsite.file.clone(), callsite.line) {
(Some(file), None) => format!("{} ({})", callsite.name, file),
(Some(file), Some(line)) => format!("{} ({}:{})", callsite.name, file, line),
_ => callsite.name.to_string(),
}
}
fn print_duration(duration: std::time::Duration) -> String {
if duration.as_nanos() < 1000 {
format!("{}ns", duration.as_nanos())
} else if duration.as_micros() < 1000 {
format!("{}μs", duration.as_micros())
} else if duration.as_millis() < 1000 {
format!("{}ms", duration.as_millis())
} else if duration.as_secs() < 120 {
format!("{}s", duration.as_secs())
} else if duration.as_secs_f64() / 60.0 < 60.0 {
format!("{}min", duration.as_secs_f64() / 60.0)
} else if duration.as_secs_f64() / 3600.0 < 8.0 {
format!("{}h", duration.as_secs_f64() / 3600.0)
} else {
format!("{}d", duration.as_secs_f64() / 3600.0 / 24.0)
}
}
/// Format only the allocated bytes, deallocated bytes and reallocated bytes in GiB, MiB, KiB, Bytes.
fn print_memory(MemoryStats { resident }: MemoryStats) -> String {
use byte_unit::Byte;
let rss_bytes = Byte::from_bytes(resident).get_appropriate_unit(true);
format!("RSS {rss_bytes:.2}")
}

View File

@ -0,0 +1,3 @@
pub mod firefox_profiler;
pub mod fmt;
pub mod span_stats;

View File

@ -0,0 +1,79 @@
use std::collections::{BTreeMap, HashMap};
use std::time::Duration;
use serde::{Deserialize, Serialize};
use crate::entry::{Entry, NewCallsite, SpanClose, SpanEnter, SpanExit};
use crate::{Error, TraceReader};
#[derive(Debug, Clone, Copy)]
enum SpanStatus {
Outside,
Inside(std::time::Duration),
}
#[derive(Serialize, Deserialize)]
pub struct CallStats {
pub call_count: usize,
pub time: u64,
}
pub fn to_call_stats<R: std::io::Read>(
trace: TraceReader<R>,
) -> Result<BTreeMap<String, CallStats>, Error> {
let mut calls = HashMap::new();
let mut spans = HashMap::new();
for entry in trace {
let entry = entry?;
match entry {
Entry::NewCallsite(callsite) => {
calls.insert(callsite.call_id, (callsite, vec![]));
}
Entry::NewThread(_) => {}
Entry::NewSpan(span) => {
spans.insert(span.id, (span, SpanStatus::Outside));
}
Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => {
let (_, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Outside = status else {
continue;
};
*status = SpanStatus::Inside(time);
}
Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => {
let (span, status) = spans.get_mut(&id).unwrap();
let SpanStatus::Inside(begin) = status else {
continue;
};
let begin = *begin;
*status = SpanStatus::Outside;
let span = *span;
let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
call_list.push(end - begin);
}
Entry::SpanClose(SpanClose { id, time: _ }) => {
spans.remove(&id);
}
Entry::Event(_) => {}
}
}
Ok(calls
.into_iter()
.map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls)))
.collect())
}
fn site_to_string(call_site: NewCallsite) -> String {
format!("{}::{}", call_site.target, call_site.name)
}
fn calls_to_stats(calls: Vec<Duration>) -> CallStats {
let nb = calls.len();
let sum: Duration = calls.iter().sum();
CallStats { call_count: nb, time: sum.as_nanos() as u64 }
}