mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-22 18:17:39 +08:00
Merge branch 'main' into tmp-release-v1.3.0
This commit is contained in:
commit
b45c36cd71
14
.github/workflows/test-suite.yml
vendored
14
.github/workflows/test-suite.yml
vendored
@ -30,20 +30,20 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
apt-get update && apt-get install -y curl
|
apt-get update && apt-get install -y curl
|
||||||
apt-get install build-essential -y
|
apt-get install build-essential -y
|
||||||
- name: Run test with Rust stable
|
- name: Setup test with Rust stable
|
||||||
if: github.event_name != 'schedule'
|
if: github.event_name != 'schedule'
|
||||||
uses: actions-rs/toolchain@v1
|
uses: actions-rs/toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: stable
|
toolchain: stable
|
||||||
override: true
|
override: true
|
||||||
- name: Run test with Rust nightly
|
- name: Setup test with Rust nightly
|
||||||
if: github.event_name == 'schedule'
|
if: github.event_name == 'schedule'
|
||||||
uses: actions-rs/toolchain@v1
|
uses: actions-rs/toolchain@v1
|
||||||
with:
|
with:
|
||||||
toolchain: nightly
|
toolchain: nightly
|
||||||
override: true
|
override: true
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.4.0
|
uses: Swatinem/rust-cache@v2.5.0
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo check without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -65,7 +65,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.4.0
|
uses: Swatinem/rust-cache@v2.5.0
|
||||||
- name: Run cargo check without any default features
|
- name: Run cargo check without any default features
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -146,7 +146,7 @@ jobs:
|
|||||||
toolchain: stable
|
toolchain: stable
|
||||||
override: true
|
override: true
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.4.0
|
uses: Swatinem/rust-cache@v2.5.0
|
||||||
- name: Run tests in debug
|
- name: Run tests in debug
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -165,7 +165,7 @@ jobs:
|
|||||||
override: true
|
override: true
|
||||||
components: clippy
|
components: clippy
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.4.0
|
uses: Swatinem/rust-cache@v2.5.0
|
||||||
- name: Run cargo clippy
|
- name: Run cargo clippy
|
||||||
uses: actions-rs/cargo@v1
|
uses: actions-rs/cargo@v1
|
||||||
with:
|
with:
|
||||||
@ -184,7 +184,7 @@ jobs:
|
|||||||
override: true
|
override: true
|
||||||
components: rustfmt
|
components: rustfmt
|
||||||
- name: Cache dependencies
|
- name: Cache dependencies
|
||||||
uses: Swatinem/rust-cache@v2.4.0
|
uses: Swatinem/rust-cache@v2.5.0
|
||||||
- name: Run cargo fmt
|
- name: Run cargo fmt
|
||||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||||
|
115
Cargo.lock
generated
115
Cargo.lock
generated
@ -405,7 +405,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -416,7 +416,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -603,7 +603,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -794,7 +794,7 @@ dependencies = [
|
|||||||
"heck",
|
"heck",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1336,7 +1336,7 @@ checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1537,7 +1537,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -1912,6 +1912,7 @@ dependencies = [
|
|||||||
"meilisearch-types",
|
"meilisearch-types",
|
||||||
"nelson",
|
"nelson",
|
||||||
"page_size 0.5.0",
|
"page_size 0.5.0",
|
||||||
|
"puffin",
|
||||||
"roaring",
|
"roaring",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@ -2022,12 +2023,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is-terminal"
|
name = "is-terminal"
|
||||||
version = "0.4.8"
|
version = "0.4.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "24fddda5af7e54bf7da53067d6e802dbcc381d0a8eef629df528e3ebf68755cb"
|
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"hermit-abi 0.3.1",
|
"hermit-abi 0.3.1",
|
||||||
"rustix 0.38.2",
|
"rustix 0.38.4",
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -2397,9 +2398,9 @@ checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "linux-raw-sys"
|
name = "linux-raw-sys"
|
||||||
version = "0.4.3"
|
version = "0.4.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0"
|
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lmdb-rkv-sys"
|
name = "lmdb-rkv-sys"
|
||||||
@ -2467,6 +2468,12 @@ dependencies = [
|
|||||||
"syn 1.0.109",
|
"syn 1.0.109",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "lz4_flex"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "manifest-dir-macros"
|
name = "manifest-dir-macros"
|
||||||
version = "0.1.17"
|
version = "0.1.17"
|
||||||
@ -2476,7 +2483,7 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -2556,6 +2563,8 @@ dependencies = [
|
|||||||
"pin-project-lite",
|
"pin-project-lite",
|
||||||
"platform-dirs",
|
"platform-dirs",
|
||||||
"prometheus",
|
"prometheus",
|
||||||
|
"puffin",
|
||||||
|
"puffin_http",
|
||||||
"rand",
|
"rand",
|
||||||
"rayon",
|
"rayon",
|
||||||
"regex",
|
"regex",
|
||||||
@ -2700,6 +2709,7 @@ dependencies = [
|
|||||||
"obkv",
|
"obkv",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"ordered-float",
|
"ordered-float",
|
||||||
|
"puffin",
|
||||||
"rand",
|
"rand",
|
||||||
"rand_pcg",
|
"rand_pcg",
|
||||||
"rayon",
|
"rayon",
|
||||||
@ -3030,7 +3040,7 @@ dependencies = [
|
|||||||
"pest_meta",
|
"pest_meta",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -3175,9 +3185,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.66"
|
version = "1.0.64"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
|
checksum = "78803b62cbf1f46fde80d7c0e803111524b9877184cfe7c3033659490ac7a7da"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
@ -3219,10 +3229,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "puffin"
|
||||||
version = "1.0.31"
|
version = "0.16.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
|
checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"bincode",
|
||||||
|
"byteorder",
|
||||||
|
"cfg-if",
|
||||||
|
"instant",
|
||||||
|
"lz4_flex",
|
||||||
|
"once_cell",
|
||||||
|
"parking_lot",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "puffin_http"
|
||||||
|
version = "0.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13bffc600c35913d282ae1e96a6ffcdf36dc7a7cdb9310e0ba15914d258c8193"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"crossbeam-channel",
|
||||||
|
"log",
|
||||||
|
"puffin",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.30"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5907a1b7c277254a8b15170f6e7c97cfa60ee7872a3217663bb81151e48184bb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
@ -3470,14 +3509,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustix"
|
name = "rustix"
|
||||||
version = "0.38.2"
|
version = "0.38.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "aabcb0461ebd01d6b79945797c27f8529082226cb630a9865a71870ff63532a4"
|
checksum = "0a962918ea88d644592894bc6dc55acc6c0956488adcebbfb6e273506b7fd6e5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bitflags 2.3.3",
|
"bitflags 2.3.3",
|
||||||
"errno",
|
"errno",
|
||||||
"libc",
|
"libc",
|
||||||
"linux-raw-sys 0.4.3",
|
"linux-raw-sys 0.4.5",
|
||||||
"windows-sys 0.48.0",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -3583,9 +3622,9 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.171"
|
version = "1.0.180"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
|
checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
]
|
]
|
||||||
@ -3610,20 +3649,20 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_derive"
|
name = "serde_derive"
|
||||||
version = "1.0.171"
|
version = "1.0.180"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682"
|
checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_json"
|
name = "serde_json"
|
||||||
version = "1.0.103"
|
version = "1.0.104"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
|
checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"indexmap 2.0.0",
|
"indexmap 2.0.0",
|
||||||
"itoa",
|
"itoa",
|
||||||
@ -3833,9 +3872,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.26"
|
version = "2.0.28"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
|
checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@ -3922,22 +3961,22 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror"
|
name = "thiserror"
|
||||||
version = "1.0.43"
|
version = "1.0.44"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
|
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"thiserror-impl",
|
"thiserror-impl",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thiserror-impl"
|
name = "thiserror-impl"
|
||||||
version = "1.0.43"
|
version = "1.0.44"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
|
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -4019,7 +4058,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -4344,7 +4383,7 @@ dependencies = [
|
|||||||
"once_cell",
|
"once_cell",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -4378,7 +4417,7 @@ checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
|
|||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.26",
|
"syn 2.0.28",
|
||||||
"wasm-bindgen-backend",
|
"wasm-bindgen-backend",
|
||||||
"wasm-bindgen-shared",
|
"wasm-bindgen-shared",
|
||||||
]
|
]
|
||||||
|
19
PROFILING.md
Normal file
19
PROFILING.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Profiling Meilisearch
|
||||||
|
|
||||||
|
Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
|
||||||
|
|
||||||
|
![An example profiling with Puffin viewer](assets/profiling-example.png)
|
||||||
|
|
||||||
|
## Profiling the Indexing Process
|
||||||
|
|
||||||
|
When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
|
||||||
|
|
||||||
|
Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
|
||||||
|
|
||||||
|
Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.
|
||||||
|
|
||||||
|
## Profiling the Search Process
|
||||||
|
|
||||||
|
We still need to take the time to profile the search side of the engine with Puffin. It would require time to profile the filtering phase, query parsing, creation, and execution. We could even profile the Actix HTTP server.
|
||||||
|
|
||||||
|
The only issue we see is the framing system. Puffin requires a global frame-based profiling phase, which collides with Meilisearch's ability to accept and answer multiple requests on different threads simultaneously.
|
BIN
assets/profiling-example.png
Normal file
BIN
assets/profiling-example.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 MiB |
@ -14,7 +14,7 @@ license.workspace = true
|
|||||||
anyhow = "1.0.70"
|
anyhow = "1.0.70"
|
||||||
csv = "1.2.1"
|
csv = "1.2.1"
|
||||||
milli = { path = "../milli" }
|
milli = { path = "../milli" }
|
||||||
mimalloc = { version = "0.1.36", default-features = false }
|
mimalloc = { version = "0.1.37", default-features = false }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
@ -22,6 +22,7 @@ log = "0.4.17"
|
|||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
page_size = "0.5.0"
|
page_size = "0.5.0"
|
||||||
|
puffin = "0.16.0"
|
||||||
roaring = { version = "0.10.1", features = ["serde"] }
|
roaring = { version = "0.10.1", features = ["serde"] }
|
||||||
serde = { version = "1.0.160", features = ["derive"] }
|
serde = { version = "1.0.160", features = ["derive"] }
|
||||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||||
|
@ -471,6 +471,8 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
|
||||||
|
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
|
||||||
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
|
||||||
|
|
||||||
@ -575,6 +577,9 @@ impl IndexScheduler {
|
|||||||
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
|
self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?;
|
||||||
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
self.breakpoint(crate::Breakpoint::InsideProcessBatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
puffin::profile_function!(format!("{:?}", batch));
|
||||||
|
|
||||||
match batch {
|
match batch {
|
||||||
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
|
||||||
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
// 1. Retrieve the tasks that matched the query at enqueue-time.
|
||||||
@ -1111,6 +1116,8 @@ impl IndexScheduler {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
match operation {
|
match operation {
|
||||||
IndexOperation::DocumentClear { mut tasks, .. } => {
|
IndexOperation::DocumentClear { mut tasks, .. } => {
|
||||||
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
|
||||||
|
@ -1053,6 +1053,8 @@ impl IndexScheduler {
|
|||||||
self.breakpoint(Breakpoint::Start);
|
self.breakpoint(Breakpoint::Start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
puffin::GlobalProfiler::lock().new_frame();
|
||||||
|
|
||||||
self.cleanup_task_queue()?;
|
self.cleanup_task_queue()?;
|
||||||
|
|
||||||
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
|
||||||
|
@ -58,7 +58,7 @@ lazy_static = "1.4.0"
|
|||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||||
meilisearch-types = { path = "../meilisearch-types" }
|
meilisearch-types = { path = "../meilisearch-types" }
|
||||||
mimalloc = { version = "0.1.36", default-features = false }
|
mimalloc = { version = "0.1.37", default-features = false }
|
||||||
mime = "0.3.17"
|
mime = "0.3.17"
|
||||||
num_cpus = "1.15.0"
|
num_cpus = "1.15.0"
|
||||||
obkv = "0.2.0"
|
obkv = "0.2.0"
|
||||||
@ -69,6 +69,8 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
|
|||||||
pin-project-lite = "0.2.9"
|
pin-project-lite = "0.2.9"
|
||||||
platform-dirs = "0.3.0"
|
platform-dirs = "0.3.0"
|
||||||
prometheus = { version = "0.13.3", features = ["process"] }
|
prometheus = { version = "0.13.3", features = ["process"] }
|
||||||
|
puffin = "0.16.0"
|
||||||
|
puffin_http = { version = "0.13.0", optional = true }
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
rayon = "1.7.0"
|
rayon = "1.7.0"
|
||||||
regex = "1.7.3"
|
regex = "1.7.3"
|
||||||
@ -133,7 +135,18 @@ zip = { version = "0.6.4", optional = true }
|
|||||||
[features]
|
[features]
|
||||||
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
|
||||||
analytics = ["segment"]
|
analytics = ["segment"]
|
||||||
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
|
profile-with-puffin = ["dep:puffin_http"]
|
||||||
|
mini-dashboard = [
|
||||||
|
"actix-web-static-files",
|
||||||
|
"static-files",
|
||||||
|
"anyhow",
|
||||||
|
"cargo_toml",
|
||||||
|
"hex",
|
||||||
|
"reqwest",
|
||||||
|
"sha-1",
|
||||||
|
"tempfile",
|
||||||
|
"zip",
|
||||||
|
]
|
||||||
chinese = ["meilisearch-types/chinese"]
|
chinese = ["meilisearch-types/chinese"]
|
||||||
hebrew = ["meilisearch-types/hebrew"]
|
hebrew = ["meilisearch-types/hebrew"]
|
||||||
japanese = ["meilisearch-types/japanese"]
|
japanese = ["meilisearch-types/japanese"]
|
||||||
|
@ -30,6 +30,10 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
|
|||||||
async fn main() -> anyhow::Result<()> {
|
async fn main() -> anyhow::Result<()> {
|
||||||
let (opt, config_read_from) = Opt::try_build()?;
|
let (opt, config_read_from) = Opt::try_build()?;
|
||||||
|
|
||||||
|
#[cfg(feature = "profile-with-puffin")]
|
||||||
|
let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
|
||||||
|
puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
|
||||||
|
|
||||||
anyhow::ensure!(
|
anyhow::ensure!(
|
||||||
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
!(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
|
||||||
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
"The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
|
||||||
|
@ -65,13 +65,16 @@ filter-parser = { path = "../filter-parser" }
|
|||||||
# documents words self-join
|
# documents words self-join
|
||||||
itertools = "0.10.5"
|
itertools = "0.10.5"
|
||||||
|
|
||||||
|
# profiling
|
||||||
|
puffin = "0.16.0"
|
||||||
|
|
||||||
# logging
|
# logging
|
||||||
log = "0.4.17"
|
log = "0.4.17"
|
||||||
logging_timer = "1.1.0"
|
logging_timer = "1.1.0"
|
||||||
csv = "1.2.1"
|
csv = "1.2.1"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
mimalloc = { version = "0.1.29", default-features = false }
|
mimalloc = { version = "0.1.37", default-features = false }
|
||||||
big_s = "1.0.2"
|
big_s = "1.0.2"
|
||||||
insta = "1.29.0"
|
insta = "1.29.0"
|
||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
|
@ -15,6 +15,8 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> Result<u64> {
|
pub fn execute(self) -> Result<u64> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||||
let Index {
|
let Index {
|
||||||
env: _env,
|
env: _env,
|
||||||
|
@ -109,6 +109,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
Some(docid)
|
Some(docid)
|
||||||
}
|
}
|
||||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
|
||||||
self.execute_inner()?;
|
self.execute_inner()?;
|
||||||
|
|
||||||
|
@ -31,6 +31,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
|||||||
autogenerate_docids: bool,
|
autogenerate_docids: bool,
|
||||||
reader: DocumentsBatchReader<R>,
|
reader: DocumentsBatchReader<R>,
|
||||||
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
|
||||||
|
|
||||||
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
|
||||||
|
@ -30,6 +30,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
stop_words: Option<&fst::Set<&[u8]>>,
|
stop_words: Option<&fst::Set<&[u8]>>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_positions_per_attributes = max_positions_per_attributes
|
let max_positions_per_attributes = max_positions_per_attributes
|
||||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
@ -20,6 +20,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
docid_fid_facet_number: grenad::Reader<R>,
|
docid_fid_facet_number: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut facet_number_docids_sorter = create_sorter(
|
let mut facet_number_docids_sorter = create_sorter(
|
||||||
|
@ -18,6 +18,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
docid_fid_facet_string: grenad::Reader<R>,
|
docid_fid_facet_string: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut facet_string_docids_sorter = create_sorter(
|
let mut facet_string_docids_sorter = create_sorter(
|
||||||
|
@ -34,6 +34,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
faceted_fields: &HashSet<FieldId>,
|
faceted_fields: &HashSet<FieldId>,
|
||||||
) -> Result<ExtractedFacetValues> {
|
) -> Result<ExtractedFacetValues> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||||
|
@ -22,6 +22,8 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut fid_word_count_docids_sorter = create_sorter(
|
let mut fid_word_count_docids_sorter = create_sorter(
|
||||||
|
@ -19,6 +19,8 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
|
|||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
(lat_fid, lng_fid): (FieldId, FieldId),
|
(lat_fid, lng_fid): (FieldId, FieldId),
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -19,6 +19,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
primary_key_id: FieldId,
|
primary_key_id: FieldId,
|
||||||
vectors_fid: FieldId,
|
vectors_fid: FieldId,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let mut writer = create_writer(
|
let mut writer = create_writer(
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
|
@ -27,6 +27,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
exact_attributes: &HashSet<FieldId>,
|
exact_attributes: &HashSet<FieldId>,
|
||||||
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_docids_sorter = create_sorter(
|
let mut word_docids_sorter = create_sorter(
|
||||||
|
@ -15,6 +15,8 @@ pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
|
|||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_fid_docids_sorter = create_sorter(
|
let mut word_fid_docids_sorter = create_sorter(
|
||||||
|
@ -21,6 +21,8 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_pair_proximity_docids_sorter = create_sorter(
|
let mut word_pair_proximity_docids_sorter = create_sorter(
|
||||||
|
@ -18,6 +18,8 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||||||
docid_word_positions: grenad::Reader<R>,
|
docid_word_positions: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
) -> Result<grenad::Reader<File>> {
|
) -> Result<grenad::Reader<File>> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
let mut word_position_docids_sorter = create_sorter(
|
let mut word_position_docids_sorter = create_sorter(
|
||||||
|
@ -52,6 +52,8 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
exact_attributes: HashSet<FieldId>,
|
exact_attributes: HashSet<FieldId>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
original_obkv_chunks
|
original_obkv_chunks
|
||||||
.par_bridge()
|
.par_bridge()
|
||||||
.map(|original_documents_chunk| {
|
.map(|original_documents_chunk| {
|
||||||
@ -238,11 +240,13 @@ fn spawn_extraction_task<FE, FS, M>(
|
|||||||
M::Output: Send,
|
M::Output: Send,
|
||||||
{
|
{
|
||||||
rayon::spawn(move || {
|
rayon::spawn(move || {
|
||||||
|
puffin::profile_scope!("extract_multiple_chunks", name);
|
||||||
let chunks: Result<M> =
|
let chunks: Result<M> =
|
||||||
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
|
chunks.into_par_iter().map(|chunk| extract_fn(chunk, indexer)).collect();
|
||||||
rayon::spawn(move || match chunks {
|
rayon::spawn(move || match chunks {
|
||||||
Ok(chunks) => {
|
Ok(chunks) => {
|
||||||
debug!("merge {} database", name);
|
debug!("merge {} database", name);
|
||||||
|
puffin::profile_scope!("merge_multiple_chunks", name);
|
||||||
let reader = chunks.merge(merge_fn, &indexer);
|
let reader = chunks.merge(merge_fn, &indexer);
|
||||||
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
|
let _ = lmdb_writer_sx.send(reader.map(serialize_fn));
|
||||||
}
|
}
|
||||||
|
@ -214,6 +214,7 @@ pub fn sorter_into_lmdb_database(
|
|||||||
sorter: Sorter<MergeFn>,
|
sorter: Sorter<MergeFn>,
|
||||||
merge: MergeFn,
|
merge: MergeFn,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
debug!("Writing MTBL sorter...");
|
debug!("Writing MTBL sorter...");
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
|
|
||||||
|
@ -137,6 +137,8 @@ where
|
|||||||
mut self,
|
mut self,
|
||||||
reader: DocumentsBatchReader<R>,
|
reader: DocumentsBatchReader<R>,
|
||||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
// Early return when there is no document to add
|
// Early return when there is no document to add
|
||||||
if reader.is_empty() {
|
if reader.is_empty() {
|
||||||
return Ok((self, Ok(0)));
|
return Ok((self, Ok(0)));
|
||||||
@ -175,6 +177,8 @@ where
|
|||||||
mut self,
|
mut self,
|
||||||
to_delete: Vec<String>,
|
to_delete: Vec<String>,
|
||||||
) -> Result<(Self, StdResult<u64, UserError>)> {
|
) -> Result<(Self, StdResult<u64, UserError>)> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
// Early return when there is no document to add
|
// Early return when there is no document to add
|
||||||
if to_delete.is_empty() {
|
if to_delete.is_empty() {
|
||||||
return Ok((self, Ok(0)));
|
return Ok((self, Ok(0)));
|
||||||
@ -194,6 +198,8 @@ where
|
|||||||
|
|
||||||
#[logging_timer::time("IndexDocuments::{}")]
|
#[logging_timer::time("IndexDocuments::{}")]
|
||||||
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
if self.added_documents == 0 {
|
if self.added_documents == 0 {
|
||||||
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
|
||||||
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
|
||||||
@ -232,6 +238,8 @@ where
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let TransformOutput {
|
let TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
@ -322,6 +330,7 @@ where
|
|||||||
|
|
||||||
// Run extraction pipeline in parallel.
|
// Run extraction pipeline in parallel.
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
|
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||||
// split obkv file into several chunks
|
// split obkv file into several chunks
|
||||||
let original_chunk_iter =
|
let original_chunk_iter =
|
||||||
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
|
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
|
||||||
@ -477,6 +486,8 @@ where
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
// Merged databases are already been indexed, we start from this count;
|
// Merged databases are already been indexed, we start from this count;
|
||||||
let mut databases_seen = MERGED_DATABASE_COUNT;
|
let mut databases_seen = MERGED_DATABASE_COUNT;
|
||||||
|
|
||||||
@ -511,26 +522,36 @@ where
|
|||||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||||
}
|
}
|
||||||
|
|
||||||
let current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
let current_prefix_fst;
|
||||||
|
let common_prefix_fst_words_tmp;
|
||||||
|
let common_prefix_fst_words: Vec<_>;
|
||||||
|
let new_prefix_fst_words;
|
||||||
|
let del_prefix_fst_words;
|
||||||
|
|
||||||
|
{
|
||||||
|
puffin::profile_scope!("compute_prefix_diffs");
|
||||||
|
|
||||||
|
current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
|
||||||
|
|
||||||
// We retrieve the common words between the previous and new prefix word fst.
|
// We retrieve the common words between the previous and new prefix word fst.
|
||||||
let common_prefix_fst_words = fst_stream_into_vec(
|
common_prefix_fst_words_tmp = fst_stream_into_vec(
|
||||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
previous_words_prefixes_fst.op().add(¤t_prefix_fst).intersection(),
|
||||||
);
|
);
|
||||||
let common_prefix_fst_words: Vec<_> = common_prefix_fst_words
|
common_prefix_fst_words = common_prefix_fst_words_tmp
|
||||||
.as_slice()
|
.as_slice()
|
||||||
.linear_group_by_key(|x| x.chars().next().unwrap())
|
.linear_group_by_key(|x| x.chars().next().unwrap())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
// We retrieve the newly added words between the previous and new prefix word fst.
|
// We retrieve the newly added words between the previous and new prefix word fst.
|
||||||
let new_prefix_fst_words = fst_stream_into_vec(
|
new_prefix_fst_words = fst_stream_into_vec(
|
||||||
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
current_prefix_fst.op().add(&previous_words_prefixes_fst).difference(),
|
||||||
);
|
);
|
||||||
|
|
||||||
// We compute the set of prefixes that are no more part of the prefix fst.
|
// We compute the set of prefixes that are no more part of the prefix fst.
|
||||||
let del_prefix_fst_words = fst_stream_into_hashset(
|
del_prefix_fst_words = fst_stream_into_hashset(
|
||||||
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
previous_words_prefixes_fst.op().add(¤t_prefix_fst).difference(),
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
databases_seen += 1;
|
databases_seen += 1;
|
||||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||||
@ -668,6 +689,8 @@ fn execute_word_prefix_docids(
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let cursor = reader.into_cursor()?;
|
let cursor = reader.into_cursor()?;
|
||||||
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
|
||||||
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
builder.chunk_compression_type = indexer_config.chunk_compression_type;
|
||||||
|
@ -558,6 +558,8 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
{
|
{
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let primary_key = self
|
let primary_key = self
|
||||||
.index
|
.index
|
||||||
.primary_key(wtxn)?
|
.primary_key(wtxn)?
|
||||||
|
@ -46,6 +46,66 @@ pub(crate) enum TypedChunk {
|
|||||||
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TypedChunk {
|
||||||
|
pub fn to_debug_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
TypedChunk::FieldIdDocidFacetStrings(grenad) => {
|
||||||
|
format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
|
||||||
|
format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::Documents(grenad) => {
|
||||||
|
format!("Documents {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdWordcountDocids(grenad) => {
|
||||||
|
format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::NewDocumentsIds(grenad) => {
|
||||||
|
format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
|
||||||
|
"WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
|
||||||
|
word_docids_reader.len(),
|
||||||
|
exact_word_docids_reader.len()
|
||||||
|
),
|
||||||
|
TypedChunk::WordPositionDocids(grenad) => {
|
||||||
|
format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::WordFidDocids(grenad) => {
|
||||||
|
format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::WordPairProximityDocids(grenad) => {
|
||||||
|
format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdFacetStringDocids(grenad) => {
|
||||||
|
format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdFacetNumberDocids(grenad) => {
|
||||||
|
format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdFacetExistsDocids(grenad) => {
|
||||||
|
format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
|
||||||
|
format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
|
||||||
|
format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::GeoPoints(grenad) => {
|
||||||
|
format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::VectorPoints(grenad) => {
|
||||||
|
format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
TypedChunk::ScriptLanguageDocids(grenad) => {
|
||||||
|
format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
/// Write typed chunk in the corresponding LMDB database of the provided index.
|
||||||
/// Return new documents seen.
|
/// Return new documents seen.
|
||||||
pub(crate) fn write_typed_chunk_into_index(
|
pub(crate) fn write_typed_chunk_into_index(
|
||||||
@ -54,6 +114,8 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index_is_empty: bool,
|
index_is_empty: bool,
|
||||||
) -> Result<(RoaringBitmap, bool)> {
|
) -> Result<(RoaringBitmap, bool)> {
|
||||||
|
puffin::profile_function!(typed_chunk.to_debug_string());
|
||||||
|
|
||||||
let mut is_merged_database = false;
|
let mut is_merged_database = false;
|
||||||
match typed_chunk {
|
match typed_chunk {
|
||||||
TypedChunk::Documents(obkv_documents_iter) => {
|
TypedChunk::Documents(obkv_documents_iter) => {
|
||||||
@ -350,6 +412,8 @@ where
|
|||||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||||
{
|
{
|
||||||
|
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
let database = database.remap_types::<ByteSlice, ByteSlice>();
|
||||||
|
|
||||||
@ -392,6 +456,8 @@ where
|
|||||||
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
|
||||||
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
|
||||||
{
|
{
|
||||||
|
puffin::profile_function!(format!("number of entries: {}", data.len()));
|
||||||
|
|
||||||
if !index_is_empty {
|
if !index_is_empty {
|
||||||
return write_entries_into_database(
|
return write_entries_into_database(
|
||||||
data,
|
data,
|
||||||
|
@ -50,6 +50,8 @@ impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
|
|||||||
common_prefix_fst_words: &[&'a [String]],
|
common_prefix_fst_words: &[&'a [String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
index_word_prefix_database(
|
index_word_prefix_database(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
self.index.word_pair_proximity_docids,
|
self.index.word_pair_proximity_docids,
|
||||||
|
@ -27,6 +27,8 @@ pub fn index_prefix_word_database(
|
|||||||
chunk_compression_type: CompressionType,
|
chunk_compression_type: CompressionType,
|
||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let max_proximity = max_proximity - 1;
|
let max_proximity = max_proximity - 1;
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
|
|
||||||
|
@ -191,6 +191,7 @@ pub fn index_word_prefix_database(
|
|||||||
chunk_compression_type: CompressionType,
|
chunk_compression_type: CompressionType,
|
||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
|
||||||
|
|
||||||
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
|
// Make a prefix trie from the common prefixes that are shorter than self.max_prefix_length
|
||||||
|
@ -303,6 +303,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
FP: Fn(UpdateIndexingStep) + Sync,
|
FP: Fn(UpdateIndexingStep) + Sync,
|
||||||
FA: Fn() -> bool + Sync,
|
FA: Fn() -> bool + Sync,
|
||||||
{
|
{
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||||
// if the settings are set before any document update, we don't need to do anything, and
|
// if the settings are set before any document update, we don't need to do anything, and
|
||||||
// will set the primary key during the first document addition.
|
// will set the primary key during the first document addition.
|
||||||
|
@ -45,6 +45,8 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
// It is forbidden to keep a mutable reference into the database
|
// It is forbidden to keep a mutable reference into the database
|
||||||
// and write into it at the same time, therefore we write into another file.
|
// and write into it at the same time, therefore we write into another file.
|
||||||
let mut prefix_docids_sorter = create_sorter(
|
let mut prefix_docids_sorter = create_sorter(
|
||||||
|
@ -50,6 +50,7 @@ impl<'t, 'u, 'i> WordPrefixIntegerDocids<'t, 'u, 'i> {
|
|||||||
common_prefix_fst_words: &[&[String]],
|
common_prefix_fst_words: &[&[String]],
|
||||||
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
del_prefix_fst_words: &HashSet<Vec<u8>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
debug!("Computing and writing the word levels integers docids into LMDB on disk...");
|
||||||
|
|
||||||
let mut prefix_integer_docids_sorter = create_sorter(
|
let mut prefix_integer_docids_sorter = create_sorter(
|
||||||
|
@ -42,6 +42,8 @@ impl<'t, 'u, 'i> WordsPrefixesFst<'t, 'u, 'i> {
|
|||||||
|
|
||||||
#[logging_timer::time("WordsPrefixesFst::{}")]
|
#[logging_timer::time("WordsPrefixesFst::{}")]
|
||||||
pub fn execute(self) -> Result<()> {
|
pub fn execute(self) -> Result<()> {
|
||||||
|
puffin::profile_function!();
|
||||||
|
|
||||||
let words_fst = self.index.words_fst(self.wtxn)?;
|
let words_fst = self.index.words_fst(self.wtxn)?;
|
||||||
|
|
||||||
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];
|
let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];
|
||||||
|
Loading…
Reference in New Issue
Block a user