diff --git a/.github/workflows/benchmarks-manual.yml b/.github/workflows/benchmarks-manual.yml index da33bf803..14b77c83d 100644 --- a/.github/workflows/benchmarks-manual.yml +++ b/.github/workflows/benchmarks-manual.yml @@ -43,7 +43,7 @@ jobs: # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }} run: | - cd benchmarks + cd crates/benchmarks cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }} # Generate critcmp files diff --git a/.github/workflows/benchmarks-pr.yml b/.github/workflows/benchmarks-pr.yml index f9d609d6e..a083baa3c 100644 --- a/.github/workflows/benchmarks-pr.yml +++ b/.github/workflows/benchmarks-pr.yml @@ -88,7 +88,7 @@ jobs: # Run benchmarks - name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }} run: | - cd benchmarks + cd crates/benchmarks cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }} # Generate critcmp files diff --git a/.github/workflows/benchmarks-push-indexing.yml b/.github/workflows/benchmarks-push-indexing.yml index 1fdd5fd67..4495b4b9d 100644 --- a/.github/workflows/benchmarks-push-indexing.yml +++ b/.github/workflows/benchmarks-push-indexing.yml @@ -41,7 +41,7 @@ jobs: # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }} run: | - cd benchmarks + cd crates/benchmarks cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }} # Generate critcmp files diff --git a/.github/workflows/benchmarks-push-search-geo.yml b/.github/workflows/benchmarks-push-search-geo.yml index 82881b41b..22218cd6e 100644 --- a/.github/workflows/benchmarks-push-search-geo.yml +++ b/.github/workflows/benchmarks-push-search-geo.yml @@ -40,7 +40,7 @@ jobs: # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }} run: | - cd benchmarks + cd crates/benchmarks cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }} # Generate critcmp files diff --git a/.github/workflows/benchmarks-push-search-songs.yml b/.github/workflows/benchmarks-push-search-songs.yml index b6169ddf7..e9744a434 100644 --- a/.github/workflows/benchmarks-push-search-songs.yml +++ b/.github/workflows/benchmarks-push-search-songs.yml @@ -40,7 +40,7 @@ jobs: # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }} run: | - cd benchmarks + cd crates/benchmarks cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }} # Generate critcmp files diff --git a/.github/workflows/benchmarks-push-search-wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml index dd3146a14..bc9e1bcd0 100644 --- a/.github/workflows/benchmarks-push-search-wiki.yml +++ b/.github/workflows/benchmarks-push-search-wiki.yml @@ -40,7 +40,7 @@ jobs: # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }} run: | - cd benchmarks + cd crates/benchmarks cargo bench --bench ${BENCH_NAME} -- --save-baseline ${{ steps.file.outputs.basename }} # Generate critcmp files diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 016a9d282..c53946fea 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -65,9 +65,9 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-12, windows-2022] + os: [macos-13, windows-2022] include: - - os: macos-12 + - os: macos-13 artifact_name: meilisearch asset_name: meilisearch-macos-amd64 - os: windows-2022 @@ -90,7 +90,7 @@ jobs: publish-macos-apple-silicon: name: Publish binary for macOS silicon - runs-on: macos-12 + runs-on: macos-13 needs: check-version strategy: matrix: diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 7dbd7d866..e142b15b6 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -33,7 +33,7 @@ jobs: - name: Setup test with Rust stable uses: dtolnay/rust-toolchain@1.79 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.1 + uses: Swatinem/rust-cache@v2.7.5 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -51,11 +51,11 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-12, windows-2022] + os: [macos-13, windows-2022] steps: - uses: actions/checkout@v3 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.1 + uses: Swatinem/rust-cache@v2.7.5 - uses: dtolnay/rust-toolchain@1.79 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 @@ -127,7 +127,7 @@ jobs: apt-get install build-essential -y - uses: dtolnay/rust-toolchain@1.79 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.1 + uses: Swatinem/rust-cache@v2.7.5 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -144,7 +144,7 @@ jobs: profile: minimal components: clippy - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.1 + uses: Swatinem/rust-cache@v2.7.5 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -163,11 +163,11 @@ jobs: override: true components: rustfmt - name: Cache dependencies - uses: Swatinem/rust-cache@v2.7.1 + uses: Swatinem/rust-cache@v2.7.5 - name: Run cargo fmt # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate # we are going to create an empty file where rustfmt expects it. run: | - echo -ne "\n" > benchmarks/benches/datasets_paths.rs + echo -ne "\n" > crates/benchmarks/benches/datasets_paths.rs cargo fmt --all -- --check diff --git a/.gitignore b/.gitignore index e00f45c1e..0d6750008 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,6 @@ **/*.json_lines **/*.rs.bk /*.mdb -/query-history.txt /data.ms /snapshots /dumps @@ -19,4 +18,4 @@ *.snap.new # Fuzzcheck data for the facet indexing fuzz test -milli/fuzz/update::facet::incremental::fuzz::fuzz/ +crates/milli/fuzz/update::facet::incremental::fuzz::fuzz/ diff --git a/Cargo.lock b/Cargo.lock index 4c40d249e..c09c28d25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -216,7 +216,7 @@ dependencies = [ "actix-router", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -386,8 +386,28 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arroy" -version = "0.4.0" -source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc5f272f38fa063bbff0a7ab5219404e221493de005e2b4078c62d626ef567e" +dependencies = [ + "bytemuck", + "byteorder", + "heed", + "log", + "memmap2", + "nohash", + "ordered-float", + "rand", + "rayon", + "roaring", + "tempfile", + "thiserror", +] + +[[package]] +name = "arroy" +version = "0.5.0" +source = "git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05#053807bf38dc079f25b003f19fc30fbf3613f6e7" dependencies = [ "bytemuck", "byteorder", @@ -421,7 +441,7 @@ checksum = "6e0c28dcc82d7c8ead5cb13beb15405b57b8546e93215673ff8ca0349a028107" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -527,9 +547,9 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -573,6 +593,15 @@ dependencies = [ "serde", ] +[[package]] +name = "bitpacking" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +dependencies = [ + "crunchy", +] + [[package]] name = "bitvec" version = "1.0.1" @@ -614,7 +643,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", "syn_derive", ] @@ -710,9 +739,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" dependencies = [ "bytemuck_derive", ] @@ -725,7 +754,7 @@ checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1035,7 +1064,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1361,7 +1390,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1383,7 +1412,7 @@ checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178" dependencies = [ "darling_core 0.20.9", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1437,7 +1466,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1479,7 +1508,7 @@ dependencies = [ "darling 0.20.9", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1499,7 +1528,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core 0.20.0", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1541,7 +1570,7 @@ dependencies = [ "convert_case 0.6.0", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1605,7 +1634,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1763,7 +1792,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1783,7 +1812,7 @@ checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -1974,7 +2003,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -2317,9 +2346,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.14.5" +version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" dependencies = [ "ahash 0.8.11", "allocator-api2", @@ -2327,9 +2356,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" dependencies = [ "allocator-api2", "equivalent", @@ -2576,7 +2605,7 @@ name = "index-scheduler" version = "1.11.0" dependencies = [ "anyhow", - "arroy", + "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bincode", "bumpalo", @@ -2614,7 +2643,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.14.3", "serde", ] @@ -3266,7 +3295,7 @@ checksum = "915f6d0a2963a27cd5205c1902f32ddfe3bc035816afd268cf88c0fc0f8d287e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -3370,7 +3399,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -3436,6 +3465,7 @@ dependencies = [ "meilisearch-types", "mimalloc", "mime", + "mopa-maintained", "num_cpus", "obkv", "once_cell", @@ -3537,6 +3567,7 @@ name = "meilitool" version = "1.11.0" dependencies = [ "anyhow", + "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)", "clap", "dump", "file-store", @@ -3567,7 +3598,8 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ - "arroy", + "allocator-api2", + "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bimap", "bincode", @@ -3590,7 +3622,7 @@ dependencies = [ "fxhash", "geoutils", "grenad", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "heed", "hf-hub", "indexmap", @@ -3615,6 +3647,7 @@ dependencies = [ "rhai", "roaring", "rstar", + "rustc-hash 2.0.0", "serde", "serde_json", "slice-group-by", @@ -3703,9 +3736,15 @@ checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] +[[package]] +name = "mopa-maintained" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79b7f3e22167862cc7c95b21a6f326c22e4bf40da59cbf000b368a310173ba11" + [[package]] name = "mutually_exclusive_features" version = "0.0.3" @@ -3833,7 +3872,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -4044,7 +4083,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -4098,7 +4137,7 @@ dependencies = [ "phf_shared", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -4127,7 +4166,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -4244,9 +4283,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.81" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" +checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" dependencies = [ "unicode-ident", ] @@ -4339,7 +4378,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 1.1.0", "rustls", "thiserror", "tokio", @@ -4348,14 +4387,14 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", "ring", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", "slab", "thiserror", @@ -4434,11 +4473,12 @@ dependencies = [ [[package]] name = "raw-collections" version = "0.1.0" -source = "git+https://github.com/dureuill/raw-collections.git#4ab9619207632c20f4e0c2e126d9d909cc58ef65" +source = "git+https://github.com/dureuill/raw-collections.git#e04a52424e1124ca63df66338a79c628e8f3bfd7" dependencies = [ "allocator-api2", + "bitpacking", "bumpalo", - "hashbrown 0.15.0", + "hashbrown 0.15.1", "serde", "serde_json", ] @@ -4640,7 +4680,7 @@ source = "git+https://github.com/rhaiscript/rhai?rev=ef3df63121d27aacd838f366f2b dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -4737,6 +4777,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustc_version" version = "0.4.0" @@ -4875,9 +4921,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "f55c3193aca71c12ad7890f1785d2b73e1b9f63a0bbc353c08ef26fe03fc56b5" dependencies = [ "serde_derive", ] @@ -4893,20 +4939,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.214" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "de523f781f095e28fa605cdce0f8307e451cc0fd14e2eb4cd2e98a355b147766" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] name = "serde_json" -version = "1.0.128" +version = "1.0.132" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" dependencies = [ "indexmap", "itoa", @@ -5193,7 +5239,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -5215,9 +5261,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.60" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -5233,7 +5279,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -5259,7 +5305,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -5364,7 +5410,7 @@ checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -5389,7 +5435,7 @@ dependencies = [ "fancy-regex 0.12.0", "lazy_static", "parking_lot", - "rustc-hash", + "rustc-hash 1.1.0", ] [[package]] @@ -5516,7 +5562,7 @@ checksum = "5f5ae998a069d4b5aba8ee9dad856af7d520c3699e6159b185c2acd48155d39a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -5648,7 +5694,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -5994,7 +6040,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", "wasm-bindgen-shared", ] @@ -6028,7 +6074,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -6077,7 +6123,7 @@ version = "0.16.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0" dependencies = [ - "hashbrown 0.14.5", + "hashbrown 0.14.3", "once_cell", ] @@ -6461,7 +6507,7 @@ checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", "synstructure", ] @@ -6482,7 +6528,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] @@ -6502,7 +6548,7 @@ checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", "synstructure", ] @@ -6523,7 +6569,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.60", + "syn 2.0.87", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1d25b9795..68e049f7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,24 +1,24 @@ [workspace] resolver = "2" members = [ - "meilisearch", - "meilitool", - "meilisearch-types", - "meilisearch-auth", - "meili-snap", - "index-scheduler", - "dump", - "file-store", - "permissive-json-pointer", - "milli", - "filter-parser", - "flatten-serde-json", - "json-depth-checker", - "benchmarks", - "fuzzers", - "tracing-trace", - "xtask", - "build-info", + "crates/meilisearch", + "crates/meilitool", + "crates/meilisearch-types", + "crates/meilisearch-auth", + "crates/meili-snap", + "crates/index-scheduler", + "crates/dump", + "crates/file-store", + "crates/permissive-json-pointer", + "crates/milli", + "crates/filter-parser", + "crates/flatten-serde-json", + "crates/json-depth-checker", + "crates/benchmarks", + "crates/fuzzers", + "crates/tracing-trace", + "crates/xtask", + "crates/build-info", ] [workspace.package] diff --git a/Dockerfile b/Dockerfile index 84d1da8f5..04557df59 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,7 @@ RUN set -eux; \ # Run FROM alpine:3.20 +LABEL org.opencontainers.image.source="https://github.com/meilisearch/meilisearch" ENV MEILI_HTTP_ADDR 0.0.0.0:7700 ENV MEILI_SERVER_PROVIDER docker diff --git a/README.md b/README.md index 59d618ab2..4be92d439 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@

- - + + + + +

diff --git a/assets/meilisearch-logo-kawaii.png b/assets/meilisearch-logo-kawaii.png deleted file mode 100644 index 40dc0cb0a..000000000 Binary files a/assets/meilisearch-logo-kawaii.png and /dev/null differ diff --git a/bors.toml b/bors.toml index 8750ed993..96e9ef65e 100644 --- a/bors.toml +++ b/bors.toml @@ -1,6 +1,6 @@ status = [ 'Tests on ubuntu-20.04', - 'Tests on macos-12', + 'Tests on macos-13', 'Tests on windows-2022', 'Run Clippy', 'Run Rustfmt', diff --git a/benchmarks/.gitignore b/crates/benchmarks/.gitignore similarity index 100% rename from benchmarks/.gitignore rename to crates/benchmarks/.gitignore diff --git a/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml similarity index 100% rename from benchmarks/Cargo.toml rename to crates/benchmarks/Cargo.toml diff --git a/benchmarks/README.md b/crates/benchmarks/README.md similarity index 100% rename from benchmarks/README.md rename to crates/benchmarks/README.md diff --git a/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs similarity index 100% rename from benchmarks/benches/indexing.rs rename to crates/benchmarks/benches/indexing.rs diff --git a/benchmarks/benches/search_geo.rs b/crates/benchmarks/benches/search_geo.rs similarity index 100% rename from benchmarks/benches/search_geo.rs rename to crates/benchmarks/benches/search_geo.rs diff --git a/benchmarks/benches/search_songs.rs b/crates/benchmarks/benches/search_songs.rs similarity index 100% rename from benchmarks/benches/search_songs.rs rename to crates/benchmarks/benches/search_songs.rs diff --git a/benchmarks/benches/search_wiki.rs b/crates/benchmarks/benches/search_wiki.rs similarity index 100% rename from benchmarks/benches/search_wiki.rs rename to crates/benchmarks/benches/search_wiki.rs diff --git a/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs similarity index 100% rename from benchmarks/benches/utils.rs rename to crates/benchmarks/benches/utils.rs diff --git a/benchmarks/build.rs b/crates/benchmarks/build.rs similarity index 100% rename from benchmarks/build.rs rename to crates/benchmarks/build.rs diff --git a/benchmarks/scripts/compare.sh b/crates/benchmarks/scripts/compare.sh similarity index 100% rename from benchmarks/scripts/compare.sh rename to crates/benchmarks/scripts/compare.sh diff --git a/benchmarks/scripts/list.sh b/crates/benchmarks/scripts/list.sh similarity index 100% rename from benchmarks/scripts/list.sh rename to crates/benchmarks/scripts/list.sh diff --git a/benchmarks/src/lib.rs b/crates/benchmarks/src/lib.rs similarity index 100% rename from benchmarks/src/lib.rs rename to crates/benchmarks/src/lib.rs diff --git a/build-info/Cargo.toml b/crates/build-info/Cargo.toml similarity index 100% rename from build-info/Cargo.toml rename to crates/build-info/Cargo.toml diff --git a/build-info/build.rs b/crates/build-info/build.rs similarity index 100% rename from build-info/build.rs rename to crates/build-info/build.rs diff --git a/build-info/src/lib.rs b/crates/build-info/src/lib.rs similarity index 100% rename from build-info/src/lib.rs rename to crates/build-info/src/lib.rs diff --git a/dump/Cargo.toml b/crates/dump/Cargo.toml similarity index 100% rename from dump/Cargo.toml rename to crates/dump/Cargo.toml diff --git a/dump/README.md b/crates/dump/README.md similarity index 100% rename from dump/README.md rename to crates/dump/README.md diff --git a/dump/src/error.rs b/crates/dump/src/error.rs similarity index 100% rename from dump/src/error.rs rename to crates/dump/src/error.rs diff --git a/dump/src/lib.rs b/crates/dump/src/lib.rs similarity index 100% rename from dump/src/lib.rs rename to crates/dump/src/lib.rs diff --git a/dump/src/reader/compat/mod.rs b/crates/dump/src/reader/compat/mod.rs similarity index 100% rename from dump/src/reader/compat/mod.rs rename to crates/dump/src/reader/compat/mod.rs diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-3.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-6.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v1_to_v2__test__compat_v1_v2-9.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-11.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-11.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-11.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-11.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-14.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-14.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-14.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-14.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-5.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-5.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-5.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-5.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-8.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-8.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-8.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v2_to_v3__test__compat_v2_v3-8.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-12.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-12.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-12.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-12.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-15.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-15.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-15.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-15.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-6.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-6.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-6.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-6.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-9.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-9.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-9.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v3_to_v4__test__compat_v3_v4-9.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-12.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-12.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-12.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-12.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-6.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-6.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-6.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-6.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-9.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-9.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-9.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v4_to_v5__test__compat_v4_v5-9.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-12.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-12.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-12.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-12.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-6.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-6.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-6.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-6.snap diff --git a/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-9.snap b/crates/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-9.snap similarity index 100% rename from dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-9.snap rename to crates/dump/src/reader/compat/snapshots/dump__reader__compat__v5_to_v6__test__compat_v5_v6-9.snap diff --git a/dump/src/reader/compat/v1_to_v2.rs b/crates/dump/src/reader/compat/v1_to_v2.rs similarity index 100% rename from dump/src/reader/compat/v1_to_v2.rs rename to crates/dump/src/reader/compat/v1_to_v2.rs diff --git a/dump/src/reader/compat/v2_to_v3.rs b/crates/dump/src/reader/compat/v2_to_v3.rs similarity index 100% rename from dump/src/reader/compat/v2_to_v3.rs rename to crates/dump/src/reader/compat/v2_to_v3.rs diff --git a/dump/src/reader/compat/v3_to_v4.rs b/crates/dump/src/reader/compat/v3_to_v4.rs similarity index 100% rename from dump/src/reader/compat/v3_to_v4.rs rename to crates/dump/src/reader/compat/v3_to_v4.rs diff --git a/dump/src/reader/compat/v4_to_v5.rs b/crates/dump/src/reader/compat/v4_to_v5.rs similarity index 100% rename from dump/src/reader/compat/v4_to_v5.rs rename to crates/dump/src/reader/compat/v4_to_v5.rs diff --git a/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs similarity index 100% rename from dump/src/reader/compat/v5_to_v6.rs rename to crates/dump/src/reader/compat/v5_to_v6.rs diff --git a/dump/src/reader/mod.rs b/crates/dump/src/reader/mod.rs similarity index 100% rename from dump/src/reader/mod.rs rename to crates/dump/src/reader/mod.rs diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-10.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-4.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v1-7.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-11.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-11.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2-11.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-11.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-14.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-14.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2-14.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-14.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-5.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-5.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2-5.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-5.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-8.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-8.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2-8.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2-8.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-11.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-11.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-11.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-11.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-5.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-5.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-5.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-5.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-8.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-8.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-8.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v2_from_meilisearch_v0_22_0_issue_3435-8.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-11.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-11.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v3-11.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-11.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-14.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-14.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v3-14.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-14.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-5.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-5.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v3-5.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-5.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-8.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-8.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v3-8.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v3-8.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-12.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-12.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v4-12.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-12.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-6.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-6.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v4-6.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-6.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-9.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-9.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v4-9.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v4-9.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-12.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-12.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v5-12.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-12.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-6.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-6.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v5-6.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-6.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-9.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-9.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v5-9.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v5-9.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap b/crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap similarity index 100% rename from dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap rename to crates/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap diff --git a/dump/src/reader/v1/mod.rs b/crates/dump/src/reader/v1/mod.rs similarity index 100% rename from dump/src/reader/v1/mod.rs rename to crates/dump/src/reader/v1/mod.rs diff --git a/dump/src/reader/v1/settings.rs b/crates/dump/src/reader/v1/settings.rs similarity index 100% rename from dump/src/reader/v1/settings.rs rename to crates/dump/src/reader/v1/settings.rs diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap b/crates/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap similarity index 100% rename from dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap rename to crates/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-10.snap diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap b/crates/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap similarity index 100% rename from dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap rename to crates/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-2.snap diff --git a/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap b/crates/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap similarity index 100% rename from dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap rename to crates/dump/src/reader/v1/snapshots/dump__reader__v1__test__read_dump_v1-6.snap diff --git a/dump/src/reader/v1/update.rs b/crates/dump/src/reader/v1/update.rs similarity index 100% rename from dump/src/reader/v1/update.rs rename to crates/dump/src/reader/v1/update.rs diff --git a/dump/src/reader/v2/errors.rs b/crates/dump/src/reader/v2/errors.rs similarity index 100% rename from dump/src/reader/v2/errors.rs rename to crates/dump/src/reader/v2/errors.rs diff --git a/dump/src/reader/v2/meta.rs b/crates/dump/src/reader/v2/meta.rs similarity index 100% rename from dump/src/reader/v2/meta.rs rename to crates/dump/src/reader/v2/meta.rs diff --git a/dump/src/reader/v2/mod.rs b/crates/dump/src/reader/v2/mod.rs similarity index 100% rename from dump/src/reader/v2/mod.rs rename to crates/dump/src/reader/v2/mod.rs diff --git a/dump/src/reader/v2/settings.rs b/crates/dump/src/reader/v2/settings.rs similarity index 100% rename from dump/src/reader/v2/settings.rs rename to crates/dump/src/reader/v2/settings.rs diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-11.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-11.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-11.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-11.snap diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-14.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-14.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-14.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-14.snap diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-5.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-5.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-5.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-5.snap diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-8.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-8.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-8.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2-8.snap diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-10.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-10.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-10.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-10.snap diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-4.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-4.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-4.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-4.snap diff --git a/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-7.snap b/crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-7.snap similarity index 100% rename from dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-7.snap rename to crates/dump/src/reader/v2/snapshots/dump__reader__v2__test__read_dump_v2_from_meilisearch_v0_22_0_issue_3435-7.snap diff --git a/dump/src/reader/v2/updates.rs b/crates/dump/src/reader/v2/updates.rs similarity index 100% rename from dump/src/reader/v2/updates.rs rename to crates/dump/src/reader/v2/updates.rs diff --git a/dump/src/reader/v3/errors.rs b/crates/dump/src/reader/v3/errors.rs similarity index 100% rename from dump/src/reader/v3/errors.rs rename to crates/dump/src/reader/v3/errors.rs diff --git a/dump/src/reader/v3/meta.rs b/crates/dump/src/reader/v3/meta.rs similarity index 100% rename from dump/src/reader/v3/meta.rs rename to crates/dump/src/reader/v3/meta.rs diff --git a/dump/src/reader/v3/mod.rs b/crates/dump/src/reader/v3/mod.rs similarity index 100% rename from dump/src/reader/v3/mod.rs rename to crates/dump/src/reader/v3/mod.rs diff --git a/dump/src/reader/v3/settings.rs b/crates/dump/src/reader/v3/settings.rs similarity index 100% rename from dump/src/reader/v3/settings.rs rename to crates/dump/src/reader/v3/settings.rs diff --git a/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-11.snap b/crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-11.snap similarity index 100% rename from dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-11.snap rename to crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-11.snap diff --git a/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-14.snap b/crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-14.snap similarity index 100% rename from dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-14.snap rename to crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-14.snap diff --git a/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-5.snap b/crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-5.snap similarity index 100% rename from dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-5.snap rename to crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-5.snap diff --git a/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-8.snap b/crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-8.snap similarity index 100% rename from dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-8.snap rename to crates/dump/src/reader/v3/snapshots/dump__reader__v3__test__read_dump_v3-8.snap diff --git a/dump/src/reader/v3/updates.rs b/crates/dump/src/reader/v3/updates.rs similarity index 100% rename from dump/src/reader/v3/updates.rs rename to crates/dump/src/reader/v3/updates.rs diff --git a/dump/src/reader/v4/errors.rs b/crates/dump/src/reader/v4/errors.rs similarity index 100% rename from dump/src/reader/v4/errors.rs rename to crates/dump/src/reader/v4/errors.rs diff --git a/dump/src/reader/v4/keys.rs b/crates/dump/src/reader/v4/keys.rs similarity index 100% rename from dump/src/reader/v4/keys.rs rename to crates/dump/src/reader/v4/keys.rs diff --git a/dump/src/reader/v4/meta.rs b/crates/dump/src/reader/v4/meta.rs similarity index 97% rename from dump/src/reader/v4/meta.rs rename to crates/dump/src/reader/v4/meta.rs index cec05f57c..2daea68a4 100644 --- a/dump/src/reader/v4/meta.rs +++ b/crates/dump/src/reader/v4/meta.rs @@ -74,7 +74,8 @@ impl Display for IndexUidFormatError { f, "invalid index uid `{}`, the uid must be an integer \ or a string containing only alphanumeric characters \ - a-z A-Z 0-9, hyphens - and underscores _.", + a-z A-Z 0-9, hyphens - and underscores _, \ + and can not be more than 400 bytes.", self.invalid_uid, ) } diff --git a/dump/src/reader/v4/mod.rs b/crates/dump/src/reader/v4/mod.rs similarity index 100% rename from dump/src/reader/v4/mod.rs rename to crates/dump/src/reader/v4/mod.rs diff --git a/dump/src/reader/v4/settings.rs b/crates/dump/src/reader/v4/settings.rs similarity index 100% rename from dump/src/reader/v4/settings.rs rename to crates/dump/src/reader/v4/settings.rs diff --git a/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-10.snap b/crates/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-10.snap similarity index 100% rename from dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-10.snap rename to crates/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-10.snap diff --git a/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-13.snap b/crates/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-13.snap similarity index 100% rename from dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-13.snap rename to crates/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-13.snap diff --git a/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-7.snap b/crates/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-7.snap similarity index 100% rename from dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-7.snap rename to crates/dump/src/reader/v4/snapshots/dump__reader__v4__test__read_dump_v4-7.snap diff --git a/dump/src/reader/v4/tasks.rs b/crates/dump/src/reader/v4/tasks.rs similarity index 100% rename from dump/src/reader/v4/tasks.rs rename to crates/dump/src/reader/v4/tasks.rs diff --git a/dump/src/reader/v5/errors.rs b/crates/dump/src/reader/v5/errors.rs similarity index 100% rename from dump/src/reader/v5/errors.rs rename to crates/dump/src/reader/v5/errors.rs diff --git a/dump/src/reader/v5/keys.rs b/crates/dump/src/reader/v5/keys.rs similarity index 100% rename from dump/src/reader/v5/keys.rs rename to crates/dump/src/reader/v5/keys.rs diff --git a/dump/src/reader/v5/meta.rs b/crates/dump/src/reader/v5/meta.rs similarity index 97% rename from dump/src/reader/v5/meta.rs rename to crates/dump/src/reader/v5/meta.rs index cec05f57c..2daea68a4 100644 --- a/dump/src/reader/v5/meta.rs +++ b/crates/dump/src/reader/v5/meta.rs @@ -74,7 +74,8 @@ impl Display for IndexUidFormatError { f, "invalid index uid `{}`, the uid must be an integer \ or a string containing only alphanumeric characters \ - a-z A-Z 0-9, hyphens - and underscores _.", + a-z A-Z 0-9, hyphens - and underscores _, \ + and can not be more than 400 bytes.", self.invalid_uid, ) } diff --git a/dump/src/reader/v5/mod.rs b/crates/dump/src/reader/v5/mod.rs similarity index 100% rename from dump/src/reader/v5/mod.rs rename to crates/dump/src/reader/v5/mod.rs diff --git a/dump/src/reader/v5/settings.rs b/crates/dump/src/reader/v5/settings.rs similarity index 100% rename from dump/src/reader/v5/settings.rs rename to crates/dump/src/reader/v5/settings.rs diff --git a/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-10.snap b/crates/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-10.snap similarity index 100% rename from dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-10.snap rename to crates/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-10.snap diff --git a/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-13.snap b/crates/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-13.snap similarity index 100% rename from dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-13.snap rename to crates/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-13.snap diff --git a/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-7.snap b/crates/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-7.snap similarity index 100% rename from dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-7.snap rename to crates/dump/src/reader/v5/snapshots/dump__reader__v5__test__read_dump_v5-7.snap diff --git a/dump/src/reader/v5/tasks.rs b/crates/dump/src/reader/v5/tasks.rs similarity index 100% rename from dump/src/reader/v5/tasks.rs rename to crates/dump/src/reader/v5/tasks.rs diff --git a/dump/src/reader/v6/mod.rs b/crates/dump/src/reader/v6/mod.rs similarity index 100% rename from dump/src/reader/v6/mod.rs rename to crates/dump/src/reader/v6/mod.rs diff --git a/dump/src/writer.rs b/crates/dump/src/writer.rs similarity index 100% rename from dump/src/writer.rs rename to crates/dump/src/writer.rs diff --git a/dump/tests/assets/v1.dump b/crates/dump/tests/assets/v1.dump similarity index 100% rename from dump/tests/assets/v1.dump rename to crates/dump/tests/assets/v1.dump diff --git a/dump/tests/assets/v2-v0.22.0.dump b/crates/dump/tests/assets/v2-v0.22.0.dump similarity index 100% rename from dump/tests/assets/v2-v0.22.0.dump rename to crates/dump/tests/assets/v2-v0.22.0.dump diff --git a/dump/tests/assets/v2.dump b/crates/dump/tests/assets/v2.dump similarity index 100% rename from dump/tests/assets/v2.dump rename to crates/dump/tests/assets/v2.dump diff --git a/dump/tests/assets/v3.dump b/crates/dump/tests/assets/v3.dump similarity index 100% rename from dump/tests/assets/v3.dump rename to crates/dump/tests/assets/v3.dump diff --git a/dump/tests/assets/v4.dump b/crates/dump/tests/assets/v4.dump similarity index 100% rename from dump/tests/assets/v4.dump rename to crates/dump/tests/assets/v4.dump diff --git a/dump/tests/assets/v5.dump b/crates/dump/tests/assets/v5.dump similarity index 100% rename from dump/tests/assets/v5.dump rename to crates/dump/tests/assets/v5.dump diff --git a/dump/tests/assets/v6-with-experimental.dump b/crates/dump/tests/assets/v6-with-experimental.dump similarity index 100% rename from dump/tests/assets/v6-with-experimental.dump rename to crates/dump/tests/assets/v6-with-experimental.dump diff --git a/dump/tests/assets/v6-with-vectors.dump b/crates/dump/tests/assets/v6-with-vectors.dump similarity index 100% rename from dump/tests/assets/v6-with-vectors.dump rename to crates/dump/tests/assets/v6-with-vectors.dump diff --git a/file-store/Cargo.toml b/crates/file-store/Cargo.toml similarity index 100% rename from file-store/Cargo.toml rename to crates/file-store/Cargo.toml diff --git a/file-store/src/lib.rs b/crates/file-store/src/lib.rs similarity index 100% rename from file-store/src/lib.rs rename to crates/file-store/src/lib.rs diff --git a/filter-parser/Cargo.toml b/crates/filter-parser/Cargo.toml similarity index 100% rename from filter-parser/Cargo.toml rename to crates/filter-parser/Cargo.toml diff --git a/filter-parser/README.md b/crates/filter-parser/README.md similarity index 100% rename from filter-parser/README.md rename to crates/filter-parser/README.md diff --git a/filter-parser/fuzz/.gitignore b/crates/filter-parser/fuzz/.gitignore similarity index 100% rename from filter-parser/fuzz/.gitignore rename to crates/filter-parser/fuzz/.gitignore diff --git a/filter-parser/fuzz/Cargo.toml b/crates/filter-parser/fuzz/Cargo.toml similarity index 100% rename from filter-parser/fuzz/Cargo.toml rename to crates/filter-parser/fuzz/Cargo.toml diff --git a/filter-parser/fuzz/corpus/parse/test_1 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_1 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_1 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_1 diff --git a/filter-parser/fuzz/corpus/parse/test_10 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_10 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_10 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_10 diff --git a/filter-parser/fuzz/corpus/parse/test_11 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_11 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_11 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_11 diff --git a/filter-parser/fuzz/corpus/parse/test_12 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_12 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_12 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_12 diff --git a/filter-parser/fuzz/corpus/parse/test_13 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_13 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_13 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_13 diff --git a/filter-parser/fuzz/corpus/parse/test_14 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_14 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_14 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_14 diff --git a/filter-parser/fuzz/corpus/parse/test_15 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_15 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_15 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_15 diff --git a/filter-parser/fuzz/corpus/parse/test_16 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_16 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_16 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_16 diff --git a/filter-parser/fuzz/corpus/parse/test_17 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_17 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_17 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_17 diff --git a/filter-parser/fuzz/corpus/parse/test_18 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_18 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_18 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_18 diff --git a/filter-parser/fuzz/corpus/parse/test_19 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_19 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_19 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_19 diff --git a/filter-parser/fuzz/corpus/parse/test_2 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_2 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_2 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_2 diff --git a/filter-parser/fuzz/corpus/parse/test_20 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_20 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_20 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_20 diff --git a/filter-parser/fuzz/corpus/parse/test_21 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_21 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_21 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_21 diff --git a/filter-parser/fuzz/corpus/parse/test_22 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_22 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_22 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_22 diff --git a/filter-parser/fuzz/corpus/parse/test_23 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_23 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_23 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_23 diff --git a/filter-parser/fuzz/corpus/parse/test_24 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_24 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_24 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_24 diff --git a/filter-parser/fuzz/corpus/parse/test_25 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_25 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_25 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_25 diff --git a/filter-parser/fuzz/corpus/parse/test_26 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_26 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_26 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_26 diff --git a/filter-parser/fuzz/corpus/parse/test_27 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_27 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_27 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_27 diff --git a/filter-parser/fuzz/corpus/parse/test_28 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_28 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_28 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_28 diff --git a/filter-parser/fuzz/corpus/parse/test_29 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_29 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_29 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_29 diff --git a/filter-parser/fuzz/corpus/parse/test_3 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_3 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_3 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_3 diff --git a/filter-parser/fuzz/corpus/parse/test_30 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_30 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_30 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_30 diff --git a/filter-parser/fuzz/corpus/parse/test_31 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_31 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_31 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_31 diff --git a/filter-parser/fuzz/corpus/parse/test_32 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_32 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_32 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_32 diff --git a/filter-parser/fuzz/corpus/parse/test_33 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_33 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_33 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_33 diff --git a/filter-parser/fuzz/corpus/parse/test_34 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_34 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_34 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_34 diff --git a/filter-parser/fuzz/corpus/parse/test_35 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_35 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_35 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_35 diff --git a/filter-parser/fuzz/corpus/parse/test_36 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_36 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_36 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_36 diff --git a/filter-parser/fuzz/corpus/parse/test_37 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_37 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_37 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_37 diff --git a/filter-parser/fuzz/corpus/parse/test_38 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_38 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_38 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_38 diff --git a/filter-parser/fuzz/corpus/parse/test_39 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_39 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_39 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_39 diff --git a/filter-parser/fuzz/corpus/parse/test_4 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_4 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_4 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_4 diff --git a/filter-parser/fuzz/corpus/parse/test_40 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_40 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_40 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_40 diff --git a/filter-parser/fuzz/corpus/parse/test_41 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_41 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_41 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_41 diff --git a/filter-parser/fuzz/corpus/parse/test_42 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_42 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_42 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_42 diff --git a/filter-parser/fuzz/corpus/parse/test_43 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_43 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_43 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_43 diff --git a/filter-parser/fuzz/corpus/parse/test_5 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_5 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_5 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_5 diff --git a/filter-parser/fuzz/corpus/parse/test_6 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_6 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_6 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_6 diff --git a/filter-parser/fuzz/corpus/parse/test_7 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_7 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_7 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_7 diff --git a/filter-parser/fuzz/corpus/parse/test_8 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_8 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_8 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_8 diff --git a/filter-parser/fuzz/corpus/parse/test_9 b/crates/filter-parser/fuzz/fuzz/corpus/parse/test_9 similarity index 100% rename from filter-parser/fuzz/corpus/parse/test_9 rename to crates/filter-parser/fuzz/fuzz/corpus/parse/test_9 diff --git a/filter-parser/fuzz/fuzz_targets/parse.rs b/crates/filter-parser/fuzz/fuzz_targets/parse.rs similarity index 100% rename from filter-parser/fuzz/fuzz_targets/parse.rs rename to crates/filter-parser/fuzz/fuzz_targets/parse.rs diff --git a/filter-parser/src/condition.rs b/crates/filter-parser/src/condition.rs similarity index 100% rename from filter-parser/src/condition.rs rename to crates/filter-parser/src/condition.rs diff --git a/filter-parser/src/error.rs b/crates/filter-parser/src/error.rs similarity index 100% rename from filter-parser/src/error.rs rename to crates/filter-parser/src/error.rs diff --git a/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs similarity index 100% rename from filter-parser/src/lib.rs rename to crates/filter-parser/src/lib.rs diff --git a/filter-parser/src/main.rs b/crates/filter-parser/src/main.rs similarity index 100% rename from filter-parser/src/main.rs rename to crates/filter-parser/src/main.rs diff --git a/filter-parser/src/value.rs b/crates/filter-parser/src/value.rs similarity index 100% rename from filter-parser/src/value.rs rename to crates/filter-parser/src/value.rs diff --git a/flatten-serde-json/Cargo.toml b/crates/flatten-serde-json/Cargo.toml similarity index 100% rename from flatten-serde-json/Cargo.toml rename to crates/flatten-serde-json/Cargo.toml diff --git a/flatten-serde-json/README.md b/crates/flatten-serde-json/README.md similarity index 100% rename from flatten-serde-json/README.md rename to crates/flatten-serde-json/README.md diff --git a/flatten-serde-json/benches/benchmarks.rs b/crates/flatten-serde-json/benches/benchmarks.rs similarity index 100% rename from flatten-serde-json/benches/benchmarks.rs rename to crates/flatten-serde-json/benches/benchmarks.rs diff --git a/flatten-serde-json/fuzz/Cargo.toml b/crates/flatten-serde-json/fuzz/Cargo.toml similarity index 100% rename from flatten-serde-json/fuzz/Cargo.toml rename to crates/flatten-serde-json/fuzz/Cargo.toml diff --git a/flatten-serde-json/fuzz/fuzz_targets/flatten.rs b/crates/flatten-serde-json/fuzz/fuzz_targets/flatten.rs similarity index 100% rename from flatten-serde-json/fuzz/fuzz_targets/flatten.rs rename to crates/flatten-serde-json/fuzz/fuzz_targets/flatten.rs diff --git a/flatten-serde-json/src/lib.rs b/crates/flatten-serde-json/src/lib.rs similarity index 100% rename from flatten-serde-json/src/lib.rs rename to crates/flatten-serde-json/src/lib.rs diff --git a/flatten-serde-json/src/main.rs b/crates/flatten-serde-json/src/main.rs similarity index 100% rename from flatten-serde-json/src/main.rs rename to crates/flatten-serde-json/src/main.rs diff --git a/fuzzers/Cargo.toml b/crates/fuzzers/Cargo.toml similarity index 100% rename from fuzzers/Cargo.toml rename to crates/fuzzers/Cargo.toml diff --git a/fuzzers/README.md b/crates/fuzzers/README.md similarity index 100% rename from fuzzers/README.md rename to crates/fuzzers/README.md diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs similarity index 100% rename from fuzzers/src/bin/fuzz-indexing.rs rename to crates/fuzzers/src/bin/fuzz-indexing.rs diff --git a/fuzzers/src/lib.rs b/crates/fuzzers/src/lib.rs similarity index 100% rename from fuzzers/src/lib.rs rename to crates/fuzzers/src/lib.rs diff --git a/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml similarity index 92% rename from index-scheduler/Cargo.toml rename to crates/index-scheduler/Cargo.toml index 88f9488b5..4a2913083 100644 --- a/index-scheduler/Cargo.toml +++ b/crates/index-scheduler/Cargo.toml @@ -42,7 +42,7 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] } bumpalo = "3.16.0" [dev-dependencies] -arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } +arroy = "0.5.0" big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.39.0", features = ["json", "redactions"] } diff --git a/index-scheduler/src/autobatcher.rs b/crates/index-scheduler/src/autobatcher.rs similarity index 100% rename from index-scheduler/src/autobatcher.rs rename to crates/index-scheduler/src/autobatcher.rs diff --git a/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs similarity index 100% rename from index-scheduler/src/batch.rs rename to crates/index-scheduler/src/batch.rs diff --git a/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs similarity index 99% rename from index-scheduler/src/error.rs rename to crates/index-scheduler/src/error.rs index 223b84762..3bd378fd6 100644 --- a/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -101,7 +101,7 @@ pub enum Error { )] InvalidTaskCanceledBy { canceled_by: String }, #[error( - "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)." + "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes." )] InvalidIndexUid { index_uid: String }, #[error("Task `{0}` not found.")] diff --git a/index-scheduler/src/features.rs b/crates/index-scheduler/src/features.rs similarity index 100% rename from index-scheduler/src/features.rs rename to crates/index-scheduler/src/features.rs diff --git a/index-scheduler/src/index_mapper/index_map.rs b/crates/index-scheduler/src/index_mapper/index_map.rs similarity index 100% rename from index-scheduler/src/index_mapper/index_map.rs rename to crates/index-scheduler/src/index_mapper/index_map.rs diff --git a/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs similarity index 100% rename from index-scheduler/src/index_mapper/mod.rs rename to crates/index-scheduler/src/index_mapper/mod.rs diff --git a/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs similarity index 100% rename from index-scheduler/src/insta_snapshot.rs rename to crates/index-scheduler/src/insta_snapshot.rs diff --git a/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs similarity index 99% rename from index-scheduler/src/lib.rs rename to crates/index-scheduler/src/lib.rs index 16b4a5897..9e78d4b48 100644 --- a/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -1281,7 +1281,7 @@ impl IndexScheduler { #[cfg(test)] self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; - tracing::info!("Batch failed {}", error); + tracing::error!("Batch failed {}", error); self.update_task(&mut wtxn, &task) .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; diff --git a/index-scheduler/src/lru.rs b/crates/index-scheduler/src/lru.rs similarity index 100% rename from index-scheduler/src/lru.rs rename to crates/index-scheduler/src/lru.rs diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap b/crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap similarity index 100% rename from index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap rename to crates/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap rename to crates/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after initial push.snap diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap rename to crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap b/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap b/crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap rename to crates/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap rename to crates/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap diff --git a/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap rename to crates/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap rename to crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_queue_is_full.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/after_registering_the_5_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/first_and_second_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/third_task_succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/after_registering_the_3_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/only_first_task_succeed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/after_registering_the_3_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/only_first_task_succeed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/after_registering_the_6_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/all_other_tasks_succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/first_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/second_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key/third_task_succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/after_registering_the_6_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/first_task_succeed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/documents.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap diff --git a/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap similarity index 100% rename from index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap rename to crates/index-scheduler/src/snapshots/lib.rs/test_task_is_processing/registered_a_task.snap diff --git a/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs similarity index 100% rename from index-scheduler/src/utils.rs rename to crates/index-scheduler/src/utils.rs diff --git a/index-scheduler/src/uuid_codec.rs b/crates/index-scheduler/src/uuid_codec.rs similarity index 100% rename from index-scheduler/src/uuid_codec.rs rename to crates/index-scheduler/src/uuid_codec.rs diff --git a/json-depth-checker/Cargo.toml b/crates/json-depth-checker/Cargo.toml similarity index 100% rename from json-depth-checker/Cargo.toml rename to crates/json-depth-checker/Cargo.toml diff --git a/json-depth-checker/benches/depth.rs b/crates/json-depth-checker/benches/depth.rs similarity index 100% rename from json-depth-checker/benches/depth.rs rename to crates/json-depth-checker/benches/depth.rs diff --git a/json-depth-checker/fuzz/Cargo.toml b/crates/json-depth-checker/fuzz/Cargo.toml similarity index 100% rename from json-depth-checker/fuzz/Cargo.toml rename to crates/json-depth-checker/fuzz/Cargo.toml diff --git a/json-depth-checker/fuzz/fuzz_targets/depth.rs b/crates/json-depth-checker/fuzz/fuzz_targets/depth.rs similarity index 100% rename from json-depth-checker/fuzz/fuzz_targets/depth.rs rename to crates/json-depth-checker/fuzz/fuzz_targets/depth.rs diff --git a/json-depth-checker/src/lib.rs b/crates/json-depth-checker/src/lib.rs similarity index 100% rename from json-depth-checker/src/lib.rs rename to crates/json-depth-checker/src/lib.rs diff --git a/meili-snap/Cargo.toml b/crates/meili-snap/Cargo.toml similarity index 81% rename from meili-snap/Cargo.toml rename to crates/meili-snap/Cargo.toml index e86feabd9..6c68e563c 100644 --- a/meili-snap/Cargo.toml +++ b/crates/meili-snap/Cargo.toml @@ -11,6 +11,6 @@ edition.workspace = true license.workspace = true [dependencies] -insta = { version = "^1.39.0", features = ["json", "redactions"] } +insta = { version = "=1.39.0", features = ["json", "redactions"] } md5 = "0.7.0" once_cell = "1.19" diff --git a/meili-snap/src/lib.rs b/crates/meili-snap/src/lib.rs similarity index 100% rename from meili-snap/src/lib.rs rename to crates/meili-snap/src/lib.rs diff --git a/meili-snap/src/snapshots/lib.rs/snap/4.snap b/crates/meili-snap/src/snapshots/lib.rs/snap/4.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/snap/4.snap rename to crates/meili-snap/src/snapshots/lib.rs/snap/4.snap diff --git a/meili-snap/src/snapshots/lib.rs/snap/5.snap b/crates/meili-snap/src/snapshots/lib.rs/snap/5.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/snap/5.snap rename to crates/meili-snap/src/snapshots/lib.rs/snap/5.snap diff --git a/meili-snap/src/snapshots/lib.rs/snap/6.snap b/crates/meili-snap/src/snapshots/lib.rs/snap/6.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/snap/6.snap rename to crates/meili-snap/src/snapshots/lib.rs/snap/6.snap diff --git a/meili-snap/src/snapshots/lib.rs/snap/7.snap b/crates/meili-snap/src/snapshots/lib.rs/snap/7.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/snap/7.snap rename to crates/meili-snap/src/snapshots/lib.rs/snap/7.snap diff --git a/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap b/crates/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap rename to crates/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap diff --git a/meili-snap/src/snapshots/lib.rs/some_test/4.snap b/crates/meili-snap/src/snapshots/lib.rs/some_test/4.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/some_test/4.snap rename to crates/meili-snap/src/snapshots/lib.rs/some_test/4.snap diff --git a/meili-snap/src/snapshots/lib.rs/some_test/5.snap b/crates/meili-snap/src/snapshots/lib.rs/some_test/5.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/some_test/5.snap rename to crates/meili-snap/src/snapshots/lib.rs/some_test/5.snap diff --git a/meili-snap/src/snapshots/lib.rs/some_test/6.snap b/crates/meili-snap/src/snapshots/lib.rs/some_test/6.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/some_test/6.snap rename to crates/meili-snap/src/snapshots/lib.rs/some_test/6.snap diff --git a/meili-snap/src/snapshots/lib.rs/some_test/7.snap b/crates/meili-snap/src/snapshots/lib.rs/some_test/7.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/some_test/7.snap rename to crates/meili-snap/src/snapshots/lib.rs/some_test/7.snap diff --git a/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap b/crates/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap similarity index 100% rename from meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap rename to crates/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap diff --git a/meilisearch-auth/Cargo.toml b/crates/meilisearch-auth/Cargo.toml similarity index 100% rename from meilisearch-auth/Cargo.toml rename to crates/meilisearch-auth/Cargo.toml diff --git a/meilisearch-auth/src/dump.rs b/crates/meilisearch-auth/src/dump.rs similarity index 100% rename from meilisearch-auth/src/dump.rs rename to crates/meilisearch-auth/src/dump.rs diff --git a/meilisearch-auth/src/error.rs b/crates/meilisearch-auth/src/error.rs similarity index 100% rename from meilisearch-auth/src/error.rs rename to crates/meilisearch-auth/src/error.rs diff --git a/meilisearch-auth/src/lib.rs b/crates/meilisearch-auth/src/lib.rs similarity index 100% rename from meilisearch-auth/src/lib.rs rename to crates/meilisearch-auth/src/lib.rs diff --git a/meilisearch-auth/src/store.rs b/crates/meilisearch-auth/src/store.rs similarity index 100% rename from meilisearch-auth/src/store.rs rename to crates/meilisearch-auth/src/store.rs diff --git a/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml similarity index 95% rename from meilisearch-types/Cargo.toml rename to crates/meilisearch-types/Cargo.toml index cb4937e57..0dae024f2 100644 --- a/meilisearch-types/Cargo.toml +++ b/crates/meilisearch-types/Cargo.toml @@ -66,5 +66,8 @@ khmer = ["milli/khmer"] vietnamese = ["milli/vietnamese"] # force swedish character recomposition swedish-recomposition = ["milli/swedish-recomposition"] -# force german character recomposition +# allow german tokenization german = ["milli/german"] +# allow turkish normalization +turkish = ["milli/turkish"] + diff --git a/meilisearch-types/src/compression.rs b/crates/meilisearch-types/src/compression.rs similarity index 100% rename from meilisearch-types/src/compression.rs rename to crates/meilisearch-types/src/compression.rs diff --git a/meilisearch-types/src/deserr/mod.rs b/crates/meilisearch-types/src/deserr/mod.rs similarity index 100% rename from meilisearch-types/src/deserr/mod.rs rename to crates/meilisearch-types/src/deserr/mod.rs diff --git a/meilisearch-types/src/deserr/query_params.rs b/crates/meilisearch-types/src/deserr/query_params.rs similarity index 100% rename from meilisearch-types/src/deserr/query_params.rs rename to crates/meilisearch-types/src/deserr/query_params.rs diff --git a/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs similarity index 100% rename from meilisearch-types/src/document_formats.rs rename to crates/meilisearch-types/src/document_formats.rs diff --git a/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs similarity index 99% rename from meilisearch-types/src/error.rs rename to crates/meilisearch-types/src/error.rs index f755998a1..514ed18c3 100644 --- a/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -543,7 +543,8 @@ impl fmt::Display for deserr_codes::InvalidSimilarId { f, "the value of `id` is invalid. \ A document identifier can be of type integer or string, \ - only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)." + only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \ + and can not be more than 512 bytes." ) } } diff --git a/meilisearch-types/src/facet_values_sort.rs b/crates/meilisearch-types/src/facet_values_sort.rs similarity index 100% rename from meilisearch-types/src/facet_values_sort.rs rename to crates/meilisearch-types/src/facet_values_sort.rs diff --git a/meilisearch-types/src/features.rs b/crates/meilisearch-types/src/features.rs similarity index 100% rename from meilisearch-types/src/features.rs rename to crates/meilisearch-types/src/features.rs diff --git a/meilisearch-types/src/index_uid.rs b/crates/meilisearch-types/src/index_uid.rs similarity index 95% rename from meilisearch-types/src/index_uid.rs rename to crates/meilisearch-types/src/index_uid.rs index d64a6658d..03a31a82f 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/crates/meilisearch-types/src/index_uid.rs @@ -88,7 +88,8 @@ impl fmt::Display for IndexUidFormatError { f, "`{}` is not a valid index uid. Index uid can be an \ integer or a string containing only alphanumeric \ - characters, hyphens (-) and underscores (_).", + characters, hyphens (-) and underscores (_), \ + and can not be more than 512 bytes.", self.invalid_uid, ) } diff --git a/meilisearch-types/src/index_uid_pattern.rs b/crates/meilisearch-types/src/index_uid_pattern.rs similarity index 100% rename from meilisearch-types/src/index_uid_pattern.rs rename to crates/meilisearch-types/src/index_uid_pattern.rs diff --git a/meilisearch-types/src/keys.rs b/crates/meilisearch-types/src/keys.rs similarity index 100% rename from meilisearch-types/src/keys.rs rename to crates/meilisearch-types/src/keys.rs diff --git a/meilisearch-types/src/lib.rs b/crates/meilisearch-types/src/lib.rs similarity index 100% rename from meilisearch-types/src/lib.rs rename to crates/meilisearch-types/src/lib.rs diff --git a/meilisearch-types/src/locales.rs b/crates/meilisearch-types/src/locales.rs similarity index 100% rename from meilisearch-types/src/locales.rs rename to crates/meilisearch-types/src/locales.rs diff --git a/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs similarity index 100% rename from meilisearch-types/src/settings.rs rename to crates/meilisearch-types/src/settings.rs diff --git a/meilisearch-types/src/star_or.rs b/crates/meilisearch-types/src/star_or.rs similarity index 100% rename from meilisearch-types/src/star_or.rs rename to crates/meilisearch-types/src/star_or.rs diff --git a/meilisearch-types/src/task_view.rs b/crates/meilisearch-types/src/task_view.rs similarity index 100% rename from meilisearch-types/src/task_view.rs rename to crates/meilisearch-types/src/task_view.rs diff --git a/meilisearch-types/src/tasks.rs b/crates/meilisearch-types/src/tasks.rs similarity index 100% rename from meilisearch-types/src/tasks.rs rename to crates/meilisearch-types/src/tasks.rs diff --git a/meilisearch-types/src/versioning.rs b/crates/meilisearch-types/src/versioning.rs similarity index 100% rename from meilisearch-types/src/versioning.rs rename to crates/meilisearch-types/src/versioning.rs diff --git a/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml similarity index 94% rename from meilisearch/Cargo.toml rename to crates/meilisearch/Cargo.toml index 177ab6ee2..b11d90151 100644 --- a/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -75,7 +75,7 @@ reqwest = { version = "0.12.5", features = [ rustls = { version = "0.23.11", features = ["ring"], default-features = false } rustls-pki-types = { version = "1.7.0", features = ["alloc"] } rustls-pemfile = "2.1.2" -segment = { version = "0.2.4", optional = true } +segment = { version = "0.2.4" } serde = { version = "1.0.204", features = ["derive"] } serde_json = { version = "1.0.120", features = ["preserve_order"] } sha2 = "0.10.8" @@ -104,6 +104,7 @@ tracing-trace = { version = "0.1.0", path = "../tracing-trace" } tracing-actix-web = "0.7.11" build-info = { version = "1.7.0", path = "../build-info" } roaring = "0.10.2" +mopa-maintained = "0.2.3" [dev-dependencies] actix-rt = "2.10.0" @@ -131,8 +132,7 @@ tempfile = { version = "3.10.1", optional = true } zip = { version = "2.1.3", optional = true } [features] -default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] -analytics = ["segment"] +default = ["meilisearch-types/all-tokenizations", "mini-dashboard"] mini-dashboard = [ "static-files", "anyhow", @@ -154,7 +154,8 @@ khmer = ["meilisearch-types/khmer"] vietnamese = ["meilisearch-types/vietnamese"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"] german = ["meilisearch-types/german"] +turkish = ["meilisearch-types/turkish"] [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" -sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip" +sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e" diff --git a/meilisearch/build.rs b/crates/meilisearch/build.rs similarity index 100% rename from meilisearch/build.rs rename to crates/meilisearch/build.rs diff --git a/meilisearch/src/analytics/mock_analytics.rs b/crates/meilisearch/src/analytics/mock_analytics.rs similarity index 100% rename from meilisearch/src/analytics/mock_analytics.rs rename to crates/meilisearch/src/analytics/mock_analytics.rs diff --git a/crates/meilisearch/src/analytics/mod.rs b/crates/meilisearch/src/analytics/mod.rs new file mode 100644 index 000000000..bd14b0bfa --- /dev/null +++ b/crates/meilisearch/src/analytics/mod.rs @@ -0,0 +1,166 @@ +pub mod segment_analytics; + +use std::fs; +use std::path::{Path, PathBuf}; +use std::str::FromStr; +use std::sync::Arc; + +use actix_web::HttpRequest; +use index_scheduler::IndexScheduler; +use meilisearch_auth::AuthController; +use meilisearch_types::InstanceUid; +use mopa::mopafy; +use once_cell::sync::Lazy; +use platform_dirs::AppDirs; + +// if the feature analytics is enabled we use the real analytics +pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; + +use crate::Opt; + +/// A macro used to quickly define events that don't aggregate or send anything besides an empty event with its name. +#[macro_export] +macro_rules! empty_analytics { + ($struct_name:ident, $event_name:literal) => { + #[derive(Default)] + struct $struct_name {} + + impl $crate::analytics::Aggregate for $struct_name { + fn event_name(&self) -> &'static str { + $event_name + } + + fn aggregate(self: Box, _other: Box) -> Box { + self + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::json!({}) + } + } + }; +} + +/// The Meilisearch config dir: +/// `~/.config/Meilisearch` on *NIX or *BSD. +/// `~/Library/ApplicationSupport` on macOS. +/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows. +static MEILISEARCH_CONFIG_PATH: Lazy> = + Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir)); + +fn config_user_id_path(db_path: &Path) -> Option { + db_path + .canonicalize() + .ok() + .map(|path| path.join("instance-uid").display().to_string().replace('/', "-")) + .zip(MEILISEARCH_CONFIG_PATH.as_ref()) + .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) +} + +/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` +fn find_user_id(db_path: &Path) -> Option { + fs::read_to_string(db_path.join("instance-uid")) + .ok() + .or_else(|| fs::read_to_string(config_user_id_path(db_path)?).ok()) + .and_then(|uid| InstanceUid::from_str(&uid).ok()) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentDeletionKind { + PerDocumentId, + ClearAll, + PerBatch, + PerFilter, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentFetchKind { + PerDocumentId { retrieve_vectors: bool }, + Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, +} + +/// To send an event to segment, your event must be able to aggregate itself with another event of the same type. +pub trait Aggregate: 'static + mopa::Any + Send { + /// The name of the event that will be sent to segment. + fn event_name(&self) -> &'static str; + + /// Will be called every time an event has been used twice before segment flushed its buffer. + fn aggregate(self: Box, new: Box) -> Box + where + Self: Sized; + + /// Converts your structure to the final event that'll be sent to segment. + fn into_event(self: Box) -> serde_json::Value; +} + +mopafy!(Aggregate); + +/// Helper trait to define multiple aggregates with the same content but a different name. +/// Commonly used when you must aggregate a search with POST or with GET, for example. +pub trait AggregateMethod: 'static + Default + Send { + fn event_name() -> &'static str; +} + +/// A macro used to quickly define multiple aggregate method with their name +/// Usage: +/// ```rust +/// use meilisearch::aggregate_methods; +/// +/// aggregate_methods!( +/// SearchGET => "Documents Searched GET", +/// SearchPOST => "Documents Searched POST", +/// ); +/// ``` +#[macro_export] +macro_rules! aggregate_methods { + ($method:ident => $event_name:literal) => { + #[derive(Default)] + pub struct $method {} + + impl $crate::analytics::AggregateMethod for $method { + fn event_name() -> &'static str { + $event_name + } + } + }; + ($($method:ident => $event_name:literal,)+) => { + $( + aggregate_methods!($method => $event_name); + )+ + + }; +} + +#[derive(Clone)] +pub struct Analytics { + segment: Option>, +} + +impl Analytics { + pub async fn new( + opt: &Opt, + index_scheduler: Arc, + auth_controller: Arc, + ) -> Self { + if opt.no_analytics { + Self { segment: None } + } else { + Self { segment: SegmentAnalytics::new(opt, index_scheduler, auth_controller).await } + } + } + + pub fn no_analytics() -> Self { + Self { segment: None } + } + + pub fn instance_uid(&self) -> Option<&InstanceUid> { + self.segment.as_ref().map(|segment| segment.instance_uid.as_ref()) + } + + /// The method used to publish most analytics that do not need to be batched every hours + pub fn publish(&self, event: T, request: &HttpRequest) { + if let Some(ref segment) = self.segment { + let _ = segment.sender.try_send(segment_analytics::Message::new(event, request)); + } + } +} diff --git a/crates/meilisearch/src/analytics/segment_analytics.rs b/crates/meilisearch/src/analytics/segment_analytics.rs new file mode 100644 index 000000000..7dc746b14 --- /dev/null +++ b/crates/meilisearch/src/analytics/segment_analytics.rs @@ -0,0 +1,484 @@ +use std::any::TypeId; +use std::collections::{HashMap, HashSet}; +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use actix_web::http::header::USER_AGENT; +use actix_web::HttpRequest; +use byte_unit::Byte; +use index_scheduler::IndexScheduler; +use meilisearch_auth::{AuthController, AuthFilter}; +use meilisearch_types::features::RuntimeTogglableFeatures; +use meilisearch_types::InstanceUid; +use once_cell::sync::Lazy; +use segment::message::{Identify, Track, User}; +use segment::{AutoBatcher, Batcher, HttpClient}; +use serde::Serialize; +use serde_json::{json, Value}; +use sysinfo::{Disks, System}; +use time::OffsetDateTime; +use tokio::select; +use tokio::sync::mpsc::{self, Receiver, Sender}; +use uuid::Uuid; + +use super::{config_user_id_path, Aggregate, MEILISEARCH_CONFIG_PATH}; +use crate::option::{ + default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot, +}; +use crate::routes::{create_all_stats, Stats}; +use crate::Opt; + +const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; + +/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. +fn write_user_id(db_path: &Path, user_id: &InstanceUid) { + let _ = fs::write(db_path.join("instance-uid"), user_id.to_string()); + if let Some((meilisearch_config_path, user_id_path)) = + MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path)) + { + let _ = fs::create_dir_all(meilisearch_config_path); + let _ = fs::write(user_id_path, user_id.to_string()); + } +} + +const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb"; + +pub fn extract_user_agents(request: &HttpRequest) -> HashSet { + request + .headers() + .get(ANALYTICS_HEADER) + .or_else(|| request.headers().get(USER_AGENT)) + .and_then(|header| header.to_str().ok()) + .unwrap_or("unknown") + .split(';') + .map(str::trim) + .map(ToString::to_string) + .collect() +} + +pub struct Message { + // Since the type_id is solved statically we cannot retrieve it from the Box. + // Thus we have to send it in the message directly. + type_id: TypeId, + // Same for the aggregate function. + #[allow(clippy::type_complexity)] + aggregator_function: fn(Box, Box) -> Option>, + event: Event, +} + +pub struct Event { + original: Box, + timestamp: OffsetDateTime, + user_agents: HashSet, + total: usize, +} + +/// This function should always be called on the same type. If `this` and `other` +/// aren't the same type the function will do nothing and return `None`. +fn downcast_aggregate( + old: Box, + new: Box, +) -> Option> { + if old.is::() && new.is::() { + // Both the two following lines cannot fail, but just to be sure we don't crash, we're still avoiding unwrapping + let this = old.downcast::().ok()?; + let other = new.downcast::().ok()?; + Some(ConcreteType::aggregate(this, other)) + } else { + None + } +} + +impl Message { + pub fn new(event: T, request: &HttpRequest) -> Self { + Self { + type_id: TypeId::of::(), + event: Event { + original: Box::new(event), + timestamp: OffsetDateTime::now_utc(), + user_agents: extract_user_agents(request), + total: 1, + }, + aggregator_function: downcast_aggregate::, + } + } +} + +pub struct SegmentAnalytics { + pub instance_uid: InstanceUid, + pub user: User, + pub sender: Sender, +} + +impl SegmentAnalytics { + #[allow(clippy::new_ret_no_self)] + pub async fn new( + opt: &Opt, + index_scheduler: Arc, + auth_controller: Arc, + ) -> Option> { + let instance_uid = super::find_user_id(&opt.db_path); + let first_time_run = instance_uid.is_none(); + let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4); + write_user_id(&opt.db_path, &instance_uid); + + let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build(); + + // if reqwest throws an error we won't be able to send analytics + if client.is_err() { + return None; + } + + let client = + HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string()); + let user = User::UserId { user_id: instance_uid.to_string() }; + let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string()); + + // If Meilisearch is Launched for the first time: + // 1. Send an event Launched associated to the user `total_launch`. + // 2. Batch an event Launched with the real instance-id and send it in one hour. + if first_time_run { + let _ = batcher + .push(Track { + user: User::UserId { user_id: "total_launch".to_string() }, + event: "Launched".to_string(), + ..Default::default() + }) + .await; + let _ = batcher.flush().await; + let _ = batcher + .push(Track { + user: user.clone(), + event: "Launched".to_string(), + ..Default::default() + }) + .await; + } + + let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize + + let segment = Box::new(Segment { + inbox, + user: user.clone(), + opt: opt.clone(), + batcher, + events: HashMap::new(), + }); + tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone())); + + let this = Self { instance_uid, sender, user: user.clone() }; + + Some(Arc::new(this)) + } +} + +/// This structure represent the `infos` field we send in the analytics. +/// It's quite close to the `Opt` structure except all sensitive informations +/// have been simplified to a boolean. +/// It's send as-is in amplitude thus you should never update a name of the +/// struct without the approval of the PM. +#[derive(Debug, Clone, Serialize)] +struct Infos { + env: String, + experimental_contains_filter: bool, + experimental_vector_store: bool, + experimental_enable_metrics: bool, + experimental_edit_documents_by_function: bool, + experimental_search_queue_size: usize, + experimental_drop_search_after: usize, + experimental_nb_searches_per_core: usize, + experimental_logs_mode: LogMode, + experimental_replication_parameters: bool, + experimental_enable_logs_route: bool, + experimental_reduce_indexing_memory_usage: bool, + experimental_max_number_of_batched_tasks: usize, + gpu_enabled: bool, + db_path: bool, + import_dump: bool, + dump_dir: bool, + ignore_missing_dump: bool, + ignore_dump_if_db_exists: bool, + import_snapshot: bool, + schedule_snapshot: Option, + snapshot_dir: bool, + ignore_missing_snapshot: bool, + ignore_snapshot_if_db_exists: bool, + http_addr: bool, + http_payload_size_limit: Byte, + task_queue_webhook: bool, + task_webhook_authorization_header: bool, + log_level: String, + max_indexing_memory: MaxMemory, + max_indexing_threads: MaxThreads, + with_configuration_file: bool, + ssl_auth_path: bool, + ssl_cert_path: bool, + ssl_key_path: bool, + ssl_ocsp_path: bool, + ssl_require_auth: bool, + ssl_resumption: bool, + ssl_tickets: bool, +} + +impl Infos { + pub fn new(options: Opt, features: RuntimeTogglableFeatures) -> Self { + // We wants to decompose this whole struct by hand to be sure we don't forget + // to add analytics when we add a field in the Opt. + // Thus we must not insert `..` at the end. + let Opt { + db_path, + experimental_contains_filter, + experimental_enable_metrics, + experimental_search_queue_size, + experimental_drop_search_after, + experimental_nb_searches_per_core, + experimental_logs_mode, + experimental_replication_parameters, + experimental_enable_logs_route, + experimental_reduce_indexing_memory_usage, + experimental_max_number_of_batched_tasks, + http_addr, + master_key: _, + env, + task_webhook_url, + task_webhook_authorization_header, + max_index_size: _, + max_task_db_size: _, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + import_snapshot, + ignore_missing_snapshot, + ignore_snapshot_if_db_exists, + snapshot_dir, + schedule_snapshot, + import_dump, + ignore_missing_dump, + ignore_dump_if_db_exists, + dump_dir, + log_level, + indexer_options, + config_file_path, + no_analytics: _, + } = options; + + let schedule_snapshot = match schedule_snapshot { + ScheduleSnapshot::Disabled => None, + ScheduleSnapshot::Enabled(interval) => Some(interval), + }; + + let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = + indexer_options; + + let RuntimeTogglableFeatures { + vector_store, + metrics, + logs_route, + edit_documents_by_function, + contains_filter, + } = features; + + // We're going to override every sensible information. + // We consider information sensible if it contains a path, an address, or a key. + Self { + env, + experimental_contains_filter: experimental_contains_filter | contains_filter, + experimental_vector_store: vector_store, + experimental_edit_documents_by_function: edit_documents_by_function, + experimental_enable_metrics: experimental_enable_metrics | metrics, + experimental_search_queue_size, + experimental_drop_search_after: experimental_drop_search_after.into(), + experimental_nb_searches_per_core: experimental_nb_searches_per_core.into(), + experimental_logs_mode, + experimental_replication_parameters, + experimental_enable_logs_route: experimental_enable_logs_route | logs_route, + experimental_reduce_indexing_memory_usage, + gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), + db_path: db_path != PathBuf::from("./data.ms"), + import_dump: import_dump.is_some(), + dump_dir: dump_dir != PathBuf::from("dumps/"), + ignore_missing_dump, + ignore_dump_if_db_exists, + import_snapshot: import_snapshot.is_some(), + schedule_snapshot, + snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"), + ignore_missing_snapshot, + ignore_snapshot_if_db_exists, + http_addr: http_addr != default_http_addr(), + http_payload_size_limit, + experimental_max_number_of_batched_tasks, + task_queue_webhook: task_webhook_url.is_some(), + task_webhook_authorization_header: task_webhook_authorization_header.is_some(), + log_level: log_level.to_string(), + max_indexing_memory, + max_indexing_threads, + with_configuration_file: config_file_path.is_some(), + ssl_auth_path: ssl_auth_path.is_some(), + ssl_cert_path: ssl_cert_path.is_some(), + ssl_key_path: ssl_key_path.is_some(), + ssl_ocsp_path: ssl_ocsp_path.is_some(), + ssl_require_auth, + ssl_resumption, + ssl_tickets, + } + } +} + +pub struct Segment { + inbox: Receiver, + user: User, + opt: Opt, + batcher: AutoBatcher, + events: HashMap, +} + +impl Segment { + fn compute_traits(opt: &Opt, stats: Stats, features: RuntimeTogglableFeatures) -> Value { + static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); + static SYSTEM: Lazy = Lazy::new(|| { + let disks = Disks::new_with_refreshed_list(); + let mut sys = System::new_all(); + sys.refresh_all(); + let kernel_version = System::kernel_version() + .and_then(|k| k.split_once('-').map(|(k, _)| k.to_string())); + json!({ + "distribution": System::name(), + "kernel_version": kernel_version, + "cores": sys.cpus().len(), + "ram_size": sys.total_memory(), + "disk_size": disks.iter().map(|disk| disk.total_space()).max(), + "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), + }) + }); + let number_of_documents = + stats.indexes.values().map(|index| index.number_of_documents).collect::>(); + + json!({ + "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day + "system": *SYSTEM, + "stats": { + "database_size": stats.database_size, + "indexes_number": stats.indexes.len(), + "documents_number": number_of_documents, + }, + "infos": Infos::new(opt.clone(), features), + }) + } + + async fn run( + mut self, + index_scheduler: Arc, + auth_controller: Arc, + ) { + const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour + // The first batch must be sent after one hour. + let mut interval = + tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL); + + loop { + select! { + _ = interval.tick() => { + self.tick(index_scheduler.clone(), auth_controller.clone()).await; + }, + Some(msg) = self.inbox.recv() => { + self.handle_msg(msg); + } + } + } + } + + fn handle_msg(&mut self, Message { type_id, aggregator_function, event }: Message) { + let new_event = match self.events.remove(&type_id) { + Some(old) => { + // The function should never fail since we retrieved the corresponding TypeId in the map. But in the unfortunate + // case it could happens we're going to silently ignore the error + let Some(original) = (aggregator_function)(old.original, event.original) else { + return; + }; + Event { + original, + // We always want to return the FIRST timestamp ever encountered + timestamp: old.timestamp, + user_agents: old.user_agents.union(&event.user_agents).cloned().collect(), + total: old.total.saturating_add(event.total), + } + } + None => event, + }; + self.events.insert(type_id, new_event); + } + + async fn tick( + &mut self, + index_scheduler: Arc, + auth_controller: Arc, + ) { + if let Ok(stats) = create_all_stats( + index_scheduler.clone().into(), + auth_controller.into(), + &AuthFilter::default(), + ) { + // Replace the version number with the prototype name if any. + let version = if let Some(prototype) = build_info::DescribeResult::from_build() + .and_then(|describe| describe.as_prototype()) + { + prototype + } else { + env!("CARGO_PKG_VERSION") + }; + + let _ = self + .batcher + .push(Identify { + context: Some(json!({ + "app": { + "version": version.to_string(), + }, + })), + user: self.user.clone(), + traits: Self::compute_traits( + &self.opt, + stats, + index_scheduler.features().runtime_features(), + ), + ..Default::default() + }) + .await; + } + + // We empty the list of events + let events = std::mem::take(&mut self.events); + + for (_, event) in events { + let Event { original, timestamp, user_agents, total } = event; + let name = original.event_name(); + let mut properties = original.into_event(); + if properties["user-agent"].is_null() { + properties["user-agent"] = json!(user_agents); + }; + if properties["requests"]["total_received"].is_null() { + properties["requests"]["total_received"] = total.into(); + }; + + let _ = self + .batcher + .push(Track { + user: self.user.clone(), + event: name.to_string(), + properties, + timestamp: Some(timestamp), + ..Default::default() + }) + .await; + } + + let _ = self.batcher.flush().await; + } +} diff --git a/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs similarity index 100% rename from meilisearch/src/error.rs rename to crates/meilisearch/src/error.rs diff --git a/meilisearch/src/extractors/authentication/error.rs b/crates/meilisearch/src/extractors/authentication/error.rs similarity index 100% rename from meilisearch/src/extractors/authentication/error.rs rename to crates/meilisearch/src/extractors/authentication/error.rs diff --git a/meilisearch/src/extractors/authentication/mod.rs b/crates/meilisearch/src/extractors/authentication/mod.rs similarity index 100% rename from meilisearch/src/extractors/authentication/mod.rs rename to crates/meilisearch/src/extractors/authentication/mod.rs diff --git a/meilisearch/src/extractors/mod.rs b/crates/meilisearch/src/extractors/mod.rs similarity index 100% rename from meilisearch/src/extractors/mod.rs rename to crates/meilisearch/src/extractors/mod.rs diff --git a/meilisearch/src/extractors/payload.rs b/crates/meilisearch/src/extractors/payload.rs similarity index 100% rename from meilisearch/src/extractors/payload.rs rename to crates/meilisearch/src/extractors/payload.rs diff --git a/meilisearch/src/extractors/sequential_extractor.rs b/crates/meilisearch/src/extractors/sequential_extractor.rs similarity index 100% rename from meilisearch/src/extractors/sequential_extractor.rs rename to crates/meilisearch/src/extractors/sequential_extractor.rs diff --git a/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs similarity index 99% rename from meilisearch/src/lib.rs rename to crates/meilisearch/src/lib.rs index b24f18fae..633ad2776 100644 --- a/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -120,7 +120,7 @@ pub fn create_app( search_queue: Data, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Data, enable_dashboard: bool, ) -> actix_web::App< impl ServiceFactory< @@ -473,14 +473,14 @@ pub fn configure_data( search_queue: Data, opt: &Opt, (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Data, ) { let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config .app_data(index_scheduler) .app_data(auth) .app_data(search_queue) - .app_data(web::Data::from(analytics)) + .app_data(analytics) .app_data(web::Data::new(logs_route)) .app_data(web::Data::new(logs_stderr)) .app_data(web::Data::new(opt.clone())) diff --git a/meilisearch/src/main.rs b/crates/meilisearch/src/main.rs similarity index 94% rename from meilisearch/src/main.rs rename to crates/meilisearch/src/main.rs index b66bfc5b8..c0652bf1e 100644 --- a/meilisearch/src/main.rs +++ b/crates/meilisearch/src/main.rs @@ -5,6 +5,7 @@ use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; use std::thread::available_parallelism; +use std::time::Duration; use actix_web::http::KeepAlive; use actix_web::web::Data; @@ -123,19 +124,12 @@ async fn try_main() -> anyhow::Result<()> { let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?; - #[cfg(all(not(debug_assertions), feature = "analytics"))] - let analytics = if !opt.no_analytics { - analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone()) - .await - } else { - analytics::MockAnalytics::new(&opt) - }; - #[cfg(any(debug_assertions, not(feature = "analytics")))] - let analytics = analytics::MockAnalytics::new(&opt); + let analytics = + analytics::Analytics::new(&opt, index_scheduler.clone(), auth_controller.clone()).await; print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(index_scheduler, auth_controller, opt, log_handle, analytics).await?; + run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?; Ok(()) } @@ -145,16 +139,23 @@ async fn run_http( auth_controller: Arc, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), - analytics: Arc, + analytics: Arc, ) -> anyhow::Result<()> { let enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); let index_scheduler = Data::from(index_scheduler); let auth_controller = Data::from(auth_controller); + let analytics = Data::from(analytics); let search_queue = SearchQueue::new( opt.experimental_search_queue_size, - available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), - ); + available_parallelism() + .unwrap_or(NonZeroUsize::new(2).unwrap()) + .checked_mul(opt.experimental_nb_searches_per_core) + .unwrap_or(NonZeroUsize::MAX), + ) + .with_time_to_abort(Duration::from_secs( + usize::from(opt.experimental_drop_search_after) as u64 + )); let search_queue = Data::new(search_queue); let http_server = HttpServer::new(move || { @@ -180,11 +181,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume( - opt: &Opt, - analytics: Arc, - config_read_from: Option, -) { +pub fn print_launch_resume(opt: &Opt, analytics: Analytics, config_read_from: Option) { let build_info = build_info::BuildInfo::from_build(); let protocol = @@ -226,7 +223,6 @@ pub fn print_launch_resume( eprintln!("Prototype:\t\t{:?}", prototype); } - #[cfg(all(not(debug_assertions), feature = "analytics"))] { if !opt.no_analytics { eprintln!( diff --git a/meilisearch/src/metrics.rs b/crates/meilisearch/src/metrics.rs similarity index 100% rename from meilisearch/src/metrics.rs rename to crates/meilisearch/src/metrics.rs diff --git a/meilisearch/src/middleware.rs b/crates/meilisearch/src/middleware.rs similarity index 100% rename from meilisearch/src/middleware.rs rename to crates/meilisearch/src/middleware.rs diff --git a/meilisearch/src/option.rs b/crates/meilisearch/src/option.rs similarity index 94% rename from meilisearch/src/option.rs rename to crates/meilisearch/src/option.rs index 3799bdcb7..7e87a5a2c 100644 --- a/meilisearch/src/option.rs +++ b/crates/meilisearch/src/option.rs @@ -2,7 +2,7 @@ use std::env::VarError; use std::ffi::OsStr; use std::fmt::Display; use std::io::{BufReader, Read}; -use std::num::ParseIntError; +use std::num::{NonZeroUsize, ParseIntError}; use std::ops::Deref; use std::path::PathBuf; use std::str::FromStr; @@ -29,7 +29,6 @@ const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; const MEILI_ENV: &str = "MEILI_ENV"; const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL"; const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER"; -#[cfg(feature = "analytics")] const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; @@ -55,6 +54,8 @@ const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LO const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE"; +const MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER: &str = "MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER"; +const MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE: &str = "MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str = @@ -208,7 +209,6 @@ pub struct Opt { /// Meilisearch automatically collects data from all instances that do not opt out using this flag. /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted /// at any time. - #[cfg(feature = "analytics")] #[serde(default)] // we can't send true #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, @@ -357,10 +357,26 @@ pub struct Opt { /// Lets you customize the size of the search queue. Meilisearch processes your search requests as fast as possible but once the /// queue is full it starts returning HTTP 503, Service Unavailable. /// The default value is 1000. - #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = 1000)] - #[serde(default)] + #[clap(long, env = MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, default_value_t = default_experimental_search_queue_size())] + #[serde(default = "default_experimental_search_queue_size")] pub experimental_search_queue_size: usize, + /// Experimental drop search after. For more information, see: + /// + /// Let you customize after how many seconds Meilisearch should consider a search request irrelevant and drop it. + /// The default value is 60. + #[clap(long, env = MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, default_value_t = default_drop_search_after())] + #[serde(default = "default_drop_search_after")] + pub experimental_drop_search_after: NonZeroUsize, + + /// Experimental number of searches per core. For more information, see: + /// + /// Lets you customize how many search requests can run on each core concurrently. + /// The default value is 4. + #[clap(long, env = MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, default_value_t = default_nb_searches_per_core())] + #[serde(default = "default_nb_searches_per_core")] + pub experimental_nb_searches_per_core: NonZeroUsize, + /// Experimental logs mode feature. For more information, see: /// /// Change the mode of the logs on the console. @@ -407,7 +423,6 @@ pub struct Opt { impl Opt { /// Whether analytics should be enabled or not. - #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics } @@ -487,11 +502,12 @@ impl Opt { ignore_missing_dump: _, ignore_dump_if_db_exists: _, config_file_path: _, - #[cfg(feature = "analytics")] no_analytics, experimental_contains_filter, experimental_enable_metrics, experimental_search_queue_size, + experimental_drop_search_after, + experimental_nb_searches_per_core, experimental_logs_mode, experimental_enable_logs_route, experimental_replication_parameters, @@ -513,10 +529,7 @@ impl Opt { ); } - #[cfg(feature = "analytics")] - { - export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); - } + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); export_to_env_if_not_present( MEILI_HTTP_PAYLOAD_SIZE_LIMIT, http_payload_size_limit.to_string(), @@ -559,6 +572,14 @@ impl Opt { MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE, experimental_search_queue_size.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_DROP_SEARCH_AFTER, + experimental_drop_search_after.to_string(), + ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_NB_SEARCHES_PER_CORE, + experimental_nb_searches_per_core.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_LOGS_MODE, experimental_logs_mode.to_string(), @@ -890,6 +911,18 @@ fn default_dump_dir() -> PathBuf { PathBuf::from(DEFAULT_DUMP_DIR) } +fn default_experimental_search_queue_size() -> usize { + 1000 +} + +fn default_drop_search_after() -> NonZeroUsize { + NonZeroUsize::new(60).unwrap() +} + +fn default_nb_searches_per_core() -> NonZeroUsize { + NonZeroUsize::new(4).unwrap() +} + /// Indicates if a snapshot was scheduled, and if yes with which interval. #[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)] pub enum ScheduleSnapshot { diff --git a/meilisearch/src/routes/api_key.rs b/crates/meilisearch/src/routes/api_key.rs similarity index 100% rename from meilisearch/src/routes/api_key.rs rename to crates/meilisearch/src/routes/api_key.rs diff --git a/meilisearch/src/routes/dump.rs b/crates/meilisearch/src/routes/dump.rs similarity index 90% rename from meilisearch/src/routes/dump.rs rename to crates/meilisearch/src/routes/dump.rs index 7f3cd06a5..c78dc4dad 100644 --- a/meilisearch/src/routes/dump.rs +++ b/crates/meilisearch/src/routes/dump.rs @@ -4,7 +4,6 @@ use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; -use serde_json::json; use tracing::debug; use crate::analytics::Analytics; @@ -18,14 +17,16 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } +crate::empty_analytics!(DumpAnalytics, "Dump Created"); + pub async fn create_dump( index_scheduler: GuardedData, Data>, auth_controller: GuardedData, Data>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { - analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); + analytics.publish(DumpAnalytics::default(), &req); let task = KindWithContent::DumpCreation { keys: auth_controller.list_keys()?, diff --git a/meilisearch/src/routes/features.rs b/crates/meilisearch/src/routes/features.rs similarity index 73% rename from meilisearch/src/routes/features.rs rename to crates/meilisearch/src/routes/features.rs index bc656bdbb..5d93adc02 100644 --- a/meilisearch/src/routes/features.rs +++ b/crates/meilisearch/src/routes/features.rs @@ -6,10 +6,10 @@ use index_scheduler::IndexScheduler; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::actions; -use serde_json::json; +use serde::Serialize; use tracing::debug; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; @@ -17,7 +17,7 @@ use crate::extractors::sequential_extractor::SeqHandler; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( web::resource("") - .route(web::get().to(SeqHandler(get_features))) + .route(web::get().to(get_features)) .route(web::patch().to(SeqHandler(patch_features))), ); } @@ -27,12 +27,9 @@ async fn get_features( ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_GET }>, Data, >, - req: HttpRequest, - analytics: Data, ) -> HttpResponse { let features = index_scheduler.features(); - analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req)); let features = features.runtime_features(); debug!(returns = ?features, "Get features"); HttpResponse::Ok().json(features) @@ -53,6 +50,35 @@ pub struct RuntimeTogglableFeatures { pub contains_filter: Option, } +#[derive(Serialize)] +pub struct PatchExperimentalFeatureAnalytics { + vector_store: bool, + metrics: bool, + logs_route: bool, + edit_documents_by_function: bool, + contains_filter: bool, +} + +impl Aggregate for PatchExperimentalFeatureAnalytics { + fn event_name(&self) -> &'static str { + "Experimental features Updated" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + vector_store: new.vector_store, + metrics: new.metrics, + logs_route: new.logs_route, + edit_documents_by_function: new.edit_documents_by_function, + contains_filter: new.contains_filter, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + async fn patch_features( index_scheduler: GuardedData< ActionPolicy<{ actions::EXPERIMENTAL_FEATURES_UPDATE }>, @@ -60,7 +86,7 @@ async fn patch_features( >, new_features: AwebJson, req: HttpRequest, - analytics: Data, + analytics: Data, ) -> Result { let features = index_scheduler.features(); debug!(parameters = ?new_features, "Patch features"); @@ -89,15 +115,14 @@ async fn patch_features( } = new_features; analytics.publish( - "Experimental features Updated".to_string(), - json!({ - "vector_store": vector_store, - "metrics": metrics, - "logs_route": logs_route, - "edit_documents_by_function": edit_documents_by_function, - "contains_filter": contains_filter, - }), - Some(&req), + PatchExperimentalFeatureAnalytics { + vector_store, + metrics, + logs_route, + edit_documents_by_function, + contains_filter, + }, + &req, ); index_scheduler.put_runtime_features(new_features)?; debug!(returns = ?new_features, "Patch features"); diff --git a/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs similarity index 76% rename from meilisearch/src/routes/indexes/documents.rs rename to crates/meilisearch/src/routes/indexes/documents.rs index 85cf33c54..47f73ef42 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/crates/meilisearch/src/routes/indexes/documents.rs @@ -1,4 +1,6 @@ +use std::collections::HashSet; use std::io::ErrorKind; +use std::marker::PhantomData; use actix_web::http::header::CONTENT_TYPE; use actix_web::web::Data; @@ -23,14 +25,14 @@ use meilisearch_types::tasks::KindWithContent; use meilisearch_types::{milli, Document, Index}; use mime::Mime; use once_cell::sync::Lazy; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; use tracing::debug; -use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; +use crate::analytics::{Aggregate, AggregateMethod, Analytics}; use crate::error::MeilisearchHttpError; use crate::error::PayloadError::ReceivePayload; use crate::extractors::authentication::policies::*; @@ -41,7 +43,7 @@ use crate::routes::{ get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT, }; use crate::search::{parse_filter, RetrieveVectors}; -use crate::Opt; +use crate::{aggregate_methods, Opt}; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] @@ -100,12 +102,84 @@ pub struct GetDocument { retrieve_vectors: Param, } +aggregate_methods!( + DocumentsGET => "Documents Fetched GET", + DocumentsPOST => "Documents Fetched POST", +); + +#[derive(Serialize)] +pub struct DocumentsFetchAggregator { + // a call on ../documents/:doc_id + per_document_id: bool, + // if a filter was used + per_filter: bool, + + #[serde(rename = "vector.retrieve_vectors")] + retrieve_vectors: bool, + + // pagination + #[serde(rename = "pagination.max_limit")] + max_limit: usize, + #[serde(rename = "pagination.max_offset")] + max_offset: usize, + + marker: std::marker::PhantomData, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentFetchKind { + PerDocumentId { retrieve_vectors: bool }, + Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, +} + +impl DocumentsFetchAggregator { + pub fn from_query(query: &DocumentFetchKind) -> Self { + let (limit, offset, retrieve_vectors) = match query { + DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), + DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { + (*limit, *offset, *retrieve_vectors) + } + }; + + Self { + per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), + per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), + max_limit: limit, + max_offset: offset, + retrieve_vectors, + + marker: PhantomData, + } + } +} + +impl Aggregate for DocumentsFetchAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + per_document_id: self.per_document_id | new.per_document_id, + per_filter: self.per_filter | new.per_filter, + retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors, + max_limit: self.max_limit.max(new.max_limit), + max_offset: self.max_offset.max(new.max_offset), + marker: PhantomData, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + pub async fn get_document( index_scheduler: GuardedData, Data>, document_param: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); debug!(parameters = ?params, "Get document"); @@ -117,8 +191,15 @@ pub async fn get_document( let features = index_scheduler.features(); let retrieve_vectors = RetrieveVectors::new(param_retrieve_vectors.0, features)?; - analytics.get_fetch_documents( - &DocumentFetchKind::PerDocumentId { retrieve_vectors: param_retrieve_vectors.0 }, + analytics.publish( + DocumentsFetchAggregator:: { + retrieve_vectors: param_retrieve_vectors.0, + per_document_id: true, + per_filter: false, + max_limit: 0, + max_offset: 0, + marker: PhantomData, + }, &req, ); @@ -129,17 +210,52 @@ pub async fn get_document( Ok(HttpResponse::Ok().json(document)) } +#[derive(Serialize)] +pub struct DocumentsDeletionAggregator { + per_document_id: bool, + clear_all: bool, + per_batch: bool, + per_filter: bool, +} + +impl Aggregate for DocumentsDeletionAggregator { + fn event_name(&self) -> &'static str { + "Documents Deleted" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + per_document_id: self.per_document_id | new.per_document_id, + clear_all: self.clear_all | new.clear_all, + per_batch: self.per_batch | new.per_batch, + per_filter: self.per_filter | new.per_filter, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + pub async fn delete_document( index_scheduler: GuardedData, Data>, path: web::Path, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = path.into_inner(); let index_uid = IndexUid::try_from(index_uid)?; - analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req); + analytics.publish( + DocumentsDeletionAggregator { + per_document_id: true, + clear_all: false, + per_batch: false, + per_filter: false, + }, + &req, + ); let task = KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), @@ -190,17 +306,19 @@ pub async fn documents_by_query_post( index_uid: web::Path, body: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let body = body.into_inner(); debug!(parameters = ?body, "Get documents POST"); - analytics.post_fetch_documents( - &DocumentFetchKind::Normal { - with_filter: body.filter.is_some(), - limit: body.limit, - offset: body.offset, + analytics.publish( + DocumentsFetchAggregator:: { + per_filter: body.filter.is_some(), retrieve_vectors: body.retrieve_vectors, + max_limit: body.limit, + max_offset: body.offset, + per_document_id: false, + marker: PhantomData, }, &req, ); @@ -213,7 +331,7 @@ pub async fn get_documents( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Get documents GET"); @@ -235,12 +353,14 @@ pub async fn get_documents( filter, }; - analytics.get_fetch_documents( - &DocumentFetchKind::Normal { - with_filter: query.filter.is_some(), - limit: query.limit, - offset: query.offset, + analytics.publish( + DocumentsFetchAggregator:: { + per_filter: query.filter.is_some(), retrieve_vectors: query.retrieve_vectors, + max_limit: query.limit, + max_offset: query.offset, + per_document_id: false, + marker: PhantomData, }, &req, ); @@ -298,6 +418,39 @@ fn from_char_csv_delimiter( } } +aggregate_methods!( + Replaced => "Documents Added", + Updated => "Documents Updated", +); + +#[derive(Serialize)] +pub struct DocumentsAggregator { + payload_types: HashSet, + primary_key: HashSet, + index_creation: bool, + #[serde(skip)] + method: PhantomData, +} + +impl Aggregate for DocumentsAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + payload_types: self.payload_types.union(&new.payload_types).cloned().collect(), + primary_key: self.primary_key.union(&new.primary_key).cloned().collect(), + index_creation: self.index_creation | new.index_creation, + method: PhantomData, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(self).unwrap_or_default() + } +} + pub async fn replace_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, @@ -305,16 +458,32 @@ pub async fn replace_documents( body: Payload, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; debug!(parameters = ?params, "Replace documents"); let params = params.into_inner(); - analytics.add_documents( - ¶ms, - index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + let mut content_types = HashSet::new(); + let content_type = req + .headers() + .get(CONTENT_TYPE) + .and_then(|s| s.to_str().ok()) + .unwrap_or("unknown") + .to_string(); + content_types.insert(content_type); + let mut primary_keys = HashSet::new(); + if let Some(primary_key) = params.primary_key.clone() { + primary_keys.insert(primary_key); + } + analytics.publish( + DocumentsAggregator:: { + payload_types: content_types, + primary_key: primary_keys, + index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + method: PhantomData, + }, &req, ); @@ -346,16 +515,32 @@ pub async fn update_documents( body: Payload, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let params = params.into_inner(); debug!(parameters = ?params, "Update documents"); - analytics.add_documents( - ¶ms, - index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + let mut content_types = HashSet::new(); + let content_type = req + .headers() + .get(CONTENT_TYPE) + .and_then(|s| s.to_str().ok()) + .unwrap_or("unknown") + .to_string(); + content_types.insert(content_type); + let mut primary_keys = HashSet::new(); + if let Some(primary_key) = params.primary_key.clone() { + primary_keys.insert(primary_key); + } + analytics.publish( + DocumentsAggregator:: { + payload_types: content_types, + primary_key: primary_keys, + index_creation: index_scheduler.index_exists(&index_uid).map_or(true, |x| !x), + method: PhantomData, + }, &req, ); @@ -524,12 +709,20 @@ pub async fn delete_documents_batch( body: web::Json>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by batch"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); + analytics.publish( + DocumentsDeletionAggregator { + per_batch: true, + per_document_id: false, + clear_all: false, + per_filter: false, + }, + &req, + ); let ids = body .iter() @@ -562,14 +755,22 @@ pub async fn delete_documents_by_filter( body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Delete documents by filter"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = index_uid.into_inner(); let filter = body.into_inner().filter; - analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); + analytics.publish( + DocumentsDeletionAggregator { + per_filter: true, + per_document_id: false, + clear_all: false, + per_batch: false, + }, + &req, + ); // we ensure the filter is well formed before enqueuing it crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())? @@ -599,13 +800,41 @@ pub struct DocumentEditionByFunction { pub function: String, } +#[derive(Serialize)] +struct EditDocumentsByFunctionAggregator { + // Set to true if at least one request was filtered + filtered: bool, + // Set to true if at least one request contained a context + with_context: bool, + + index_creation: bool, +} + +impl Aggregate for EditDocumentsByFunctionAggregator { + fn event_name(&self) -> &'static str { + "Documents Edited By Function" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + filtered: self.filtered | new.filtered, + with_context: self.with_context | new.with_context, + index_creation: self.index_creation | new.index_creation, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + pub async fn edit_documents_by_function( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Edit documents by function"); @@ -617,9 +846,12 @@ pub async fn edit_documents_by_function( let index_uid = index_uid.into_inner(); let params = params.into_inner(); - analytics.update_documents_by_function( - ¶ms, - index_scheduler.index(&index_uid).is_err(), + analytics.publish( + EditDocumentsByFunctionAggregator { + filtered: params.filter.is_some(), + with_context: params.context.is_some(), + index_creation: index_scheduler.index(&index_uid).is_err(), + }, &req, ); @@ -670,10 +902,18 @@ pub async fn clear_all_documents( index_uid: web::Path, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; - analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); + analytics.publish( + DocumentsDeletionAggregator { + clear_all: true, + per_document_id: false, + per_batch: false, + per_filter: false, + }, + &req, + ); let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() }; let uid = get_task_id(&req, &opt)?; diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/crates/meilisearch/src/routes/indexes/facet_search.rs similarity index 57% rename from meilisearch/src/routes/indexes/facet_search.rs rename to crates/meilisearch/src/routes/indexes/facet_search.rs index 1df80711d..99a4a4f28 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/crates/meilisearch/src/routes/indexes/facet_search.rs @@ -1,3 +1,5 @@ +use std::collections::{BinaryHeap, HashSet}; + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -10,14 +12,15 @@ use meilisearch_types::locales::Locale; use serde_json::Value; use tracing::debug; -use crate::analytics::{Analytics, FacetSearchAggregator}; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::indexes::search::search_kind; use crate::search::{ - add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, - SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_facet_search, FacetSearchResult, HybridQuery, MatchingStrategy, + RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEARCH_OFFSET, }; use crate::search_queue::SearchQueue; @@ -53,20 +56,122 @@ pub struct FacetSearchQuery { pub locales: Option>, } +#[derive(Default)] +pub struct FacetSearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // The set of all facetNames that were used + facet_names: HashSet, + + // As there been any other parameter than the facetName or facetQuery ones? + additional_search_parameters_provided: bool, +} + +impl FacetSearchAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &FacetSearchQuery) -> Self { + let FacetSearchQuery { + facet_query: _, + facet_name, + vector, + q, + filter, + matching_strategy, + attributes_to_search_on, + hybrid, + ranking_score_threshold, + locales, + } = query; + + Self { + total_received: 1, + facet_names: Some(facet_name.clone()).into_iter().collect(), + additional_search_parameters_provided: q.is_some() + || vector.is_some() + || filter.is_some() + || *matching_strategy != MatchingStrategy::default() + || attributes_to_search_on.is_some() + || hybrid.is_some() + || ranking_score_threshold.is_some() + || locales.is_some(), + ..Default::default() + } + } + + pub fn succeed(&mut self, result: &FacetSearchResult) { + let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result; + self.total_succeeded = 1; + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for FacetSearchAggregator { + fn event_name(&self) -> &'static str { + "Facet Searched POST" + } + + fn aggregate(mut self: Box, new: Box) -> Box { + for time in new.time_spent { + self.time_spent.push(time); + } + + Box::new(Self { + total_received: self.total_received.saturating_add(new.total_received), + total_succeeded: self.total_succeeded.saturating_add(new.total_succeeded), + time_spent: self.time_spent, + facet_names: self.facet_names.union(&new.facet_names).cloned().collect(), + additional_search_parameters_provided: self.additional_search_parameters_provided + | new.additional_search_parameters_provided, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + time_spent, + facet_names, + additional_search_parameters_provided, + } = *self; + // the index of the 99th percentage of value + let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.; + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th as usize); + + serde_json::json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "facets": { + "total_distinct_facet_count": facet_names.len(), + "additional_search_parameters_provided": additional_search_parameters_provided, + }, + }) + } +} + pub async fn search( index_scheduler: GuardedData, Data>, search_queue: Data, index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); debug!(parameters = ?query, "Facet search"); - let mut aggregate = FacetSearchAggregator::from_query(&query, &req); + let mut aggregate = FacetSearchAggregator::from_query(&query); let facet_query = query.facet_query.clone(); let facet_name = query.facet_name.clone(); @@ -100,7 +205,7 @@ pub async fn search( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.post_facet_search(aggregate); + analytics.publish(aggregate, &req); let search_result = search_result?; diff --git a/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs similarity index 87% rename from meilisearch/src/routes/indexes/mod.rs rename to crates/meilisearch/src/routes/indexes/mod.rs index 35b747ccf..7d073ec5f 100644 --- a/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeSet; use std::convert::Infallible; use actix_web::web::Data; @@ -13,12 +14,11 @@ use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::{self, FieldDistribution, Index}; use meilisearch_types::tasks::KindWithContent; use serde::Serialize; -use serde_json::json; use time::OffsetDateTime; use tracing::debug; use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; @@ -28,8 +28,11 @@ use crate::Opt; pub mod documents; pub mod facet_search; pub mod search; +mod search_analytics; pub mod settings; +mod settings_analytics; pub mod similar; +mod similar_analytics; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( @@ -123,12 +126,31 @@ pub struct IndexCreateRequest { primary_key: Option, } +#[derive(Serialize)] +struct IndexCreatedAggregate { + primary_key: BTreeSet, +} + +impl Aggregate for IndexCreatedAggregate { + fn event_name(&self) -> &'static str { + "Index Created" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + pub async fn create_index( index_scheduler: GuardedData, Data>, body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Create index"); let IndexCreateRequest { primary_key, uid } = body.into_inner(); @@ -136,9 +158,8 @@ pub async fn create_index( let allow_index_creation = index_scheduler.filters().allow_index_creation(&uid); if allow_index_creation { analytics.publish( - "Index Created".to_string(), - json!({ "primary_key": primary_key }), - Some(&req), + IndexCreatedAggregate { primary_key: primary_key.iter().cloned().collect() }, + &req, ); let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key }; @@ -194,21 +215,38 @@ pub async fn get_index( Ok(HttpResponse::Ok().json(index_view)) } +#[derive(Serialize)] +struct IndexUpdatedAggregate { + primary_key: BTreeSet, +} + +impl Aggregate for IndexUpdatedAggregate { + fn event_name(&self) -> &'static str { + "Index Updated" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { primary_key: self.primary_key.union(&new.primary_key).cloned().collect() }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} pub async fn update_index( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?body, "Update index"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; let body = body.into_inner(); analytics.publish( - "Index Updated".to_string(), - json!({ "primary_key": body.primary_key }), - Some(&req), + IndexUpdatedAggregate { primary_key: body.primary_key.iter().cloned().collect() }, + &req, ); let task = KindWithContent::IndexUpdate { diff --git a/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs similarity index 97% rename from meilisearch/src/routes/indexes/search.rs rename to crates/meilisearch/src/routes/indexes/search.rs index 6a8eee521..2f5cb4a36 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/crates/meilisearch/src/routes/indexes/search.rs @@ -13,12 +13,13 @@ use meilisearch_types::serde_cs::vec::CS; use serde_json::Value; use tracing::debug; -use crate::analytics::{Analytics, SearchAggregator}; +use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; +use crate::routes::indexes::search_analytics::{SearchAggregator, SearchGET, SearchPOST}; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, RetrieveVectors, SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, @@ -225,7 +226,7 @@ pub async fn search_with_url_query( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -237,7 +238,7 @@ pub async fn search_with_url_query( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query); let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); @@ -254,7 +255,7 @@ pub async fn search_with_url_query( if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } - analytics.get_search(aggregate); + analytics.publish(aggregate, &req); let search_result = search_result?; @@ -268,7 +269,7 @@ pub async fn search_with_post( index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; @@ -280,7 +281,7 @@ pub async fn search_with_post( add_search_rules(&mut query.filter, search_rules); } - let mut aggregate = SearchAggregator::from_query(&query, &req); + let mut aggregate = SearchAggregator::::from_query(&query); let index = index_scheduler.index(&index_uid)?; @@ -302,7 +303,7 @@ pub async fn search_with_post( MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); } } - analytics.post_search(aggregate); + analytics.publish(aggregate, &req); let search_result = search_result?; diff --git a/crates/meilisearch/src/routes/indexes/search_analytics.rs b/crates/meilisearch/src/routes/indexes/search_analytics.rs new file mode 100644 index 000000000..b16e2636e --- /dev/null +++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs @@ -0,0 +1,483 @@ +use std::collections::{BTreeSet, BinaryHeap, HashMap}; + +use meilisearch_types::locales::Locale; +use once_cell::sync::Lazy; +use regex::Regex; +use serde_json::{json, Value}; + +use crate::aggregate_methods; +use crate::analytics::{Aggregate, AggregateMethod}; +use crate::search::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEMANTIC_RATIO, +}; + +aggregate_methods!( + SearchGET => "Documents Searched GET", + SearchPOST => "Documents Searched POST", +); + +#[derive(Default)] +pub struct SearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + total_degraded: usize, + total_used_negative_operator: usize, + time_spent: BinaryHeap, + + // sort + sort_with_geo_point: bool, + // every time a request has a filter, this field must be incremented by the number of terms it contains + sort_sum_of_criteria_terms: usize, + // every time a request has a filter, this field must be incremented by one + sort_total_number_of_criteria: usize, + + // distinct + distinct: bool, + + // filter + filter_with_geo_radius: bool, + filter_with_geo_bounding_box: bool, + // every time a request has a filter, this field must be incremented by the number of terms it contains + filter_sum_of_criteria_terms: usize, + // every time a request has a filter, this field must be incremented by one + filter_total_number_of_criteria: usize, + used_syntax: HashMap, + + // attributes_to_search_on + // every time a search is done using attributes_to_search_on + attributes_to_search_on_total_number_of_uses: usize, + + // q + // The maximum number of terms in a q request + max_terms_number: usize, + + // vector + // The maximum number of floats in a vector request + max_vector_size: usize, + // Whether the semantic ratio passed to a hybrid search equals the default ratio. + semantic_ratio: bool, + hybrid: bool, + retrieve_vectors: bool, + + // every time a search is done, we increment the counter linked to the used settings + matching_strategy: HashMap, + + // List of the unique Locales passed as parameter + locales: BTreeSet, + + // pagination + max_limit: usize, + max_offset: usize, + finite_pagination: usize, + + // formatting + max_attributes_to_retrieve: usize, + max_attributes_to_highlight: usize, + highlight_pre_tag: bool, + highlight_post_tag: bool, + max_attributes_to_crop: usize, + crop_marker: bool, + show_matches_position: bool, + crop_length: bool, + + // facets + facets_sum_of_terms: usize, + facets_total_number_of_facets: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, + ranking_score_threshold: bool, + + marker: std::marker::PhantomData, +} + +impl SearchAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &SearchQuery) -> Self { + let SearchQuery { + q, + vector, + offset, + limit, + page, + hits_per_page, + attributes_to_retrieve: _, + retrieve_vectors, + attributes_to_crop: _, + crop_length, + attributes_to_highlight: _, + show_matches_position, + show_ranking_score, + show_ranking_score_details, + filter, + sort, + distinct, + facets: _, + highlight_pre_tag, + highlight_post_tag, + crop_marker, + matching_strategy, + attributes_to_search_on, + hybrid, + ranking_score_threshold, + locales, + } = query; + + let mut ret = Self::default(); + + ret.total_received = 1; + + if let Some(ref sort) = sort { + ret.sort_total_number_of_criteria = 1; + ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint(")); + ret.sort_sum_of_criteria_terms = sort.len(); + } + + ret.distinct = distinct.is_some(); + + if let Some(ref filter) = filter { + static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + ret.filter_total_number_of_criteria = 1; + + let syntax = match filter { + Value::String(_) => "string".to_string(), + Value::Array(values) => { + if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + // convert the string to a HashMap + ret.used_syntax.insert(syntax, 1); + + let stringified_filters = filter.to_string(); + ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); + ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); + } + + // attributes_to_search_on + if attributes_to_search_on.is_some() { + ret.attributes_to_search_on_total_number_of_uses = 1; + } + + if let Some(ref q) = q { + ret.max_terms_number = q.split_whitespace().count(); + } + + if let Some(ref vector) = vector { + ret.max_vector_size = vector.len(); + } + ret.retrieve_vectors |= retrieve_vectors; + + if query.is_finite_pagination() { + let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + ret.max_limit = limit; + ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit; + ret.finite_pagination = 1; + } else { + ret.max_limit = *limit; + ret.max_offset = *offset; + ret.finite_pagination = 0; + } + + ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); + + if let Some(locales) = locales { + ret.locales = locales.iter().copied().collect(); + } + + ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); + ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); + ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); + ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH(); + ret.show_matches_position = *show_matches_position; + + ret.show_ranking_score = *show_ranking_score; + ret.show_ranking_score_details = *show_ranking_score_details; + ret.ranking_score_threshold = ranking_score_threshold.is_some(); + + if let Some(hybrid) = hybrid { + ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); + ret.hybrid = true; + } + + ret + } + + pub fn succeed(&mut self, result: &SearchResult) { + let SearchResult { + hits: _, + query: _, + processing_time_ms, + hits_info: _, + semantic_hit_count: _, + facet_distribution: _, + facet_stats: _, + degraded, + used_negative_operator, + } = result; + + self.total_succeeded = self.total_succeeded.saturating_add(1); + if *degraded { + self.total_degraded = self.total_degraded.saturating_add(1); + } + if *used_negative_operator { + self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1); + } + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for SearchAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(mut self: Box, new: Box) -> Box { + let Self { + total_received, + total_succeeded, + mut time_spent, + sort_with_geo_point, + sort_sum_of_criteria_terms, + sort_total_number_of_criteria, + distinct, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + attributes_to_search_on_total_number_of_uses, + max_terms_number, + max_vector_size, + retrieve_vectors, + matching_strategy, + max_limit, + max_offset, + finite_pagination, + max_attributes_to_retrieve, + max_attributes_to_highlight, + highlight_pre_tag, + highlight_post_tag, + max_attributes_to_crop, + crop_marker, + show_matches_position, + crop_length, + facets_sum_of_terms, + facets_total_number_of_facets, + show_ranking_score, + show_ranking_score_details, + semantic_ratio, + hybrid, + total_degraded, + total_used_negative_operator, + ranking_score_threshold, + mut locales, + marker: _, + } = *new; + + // request + self.total_received = self.total_received.saturating_add(total_received); + self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.total_degraded = self.total_degraded.saturating_add(total_degraded); + self.total_used_negative_operator = + self.total_used_negative_operator.saturating_add(total_used_negative_operator); + self.time_spent.append(&mut time_spent); + + // sort + self.sort_with_geo_point |= sort_with_geo_point; + self.sort_sum_of_criteria_terms = + self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms); + self.sort_total_number_of_criteria = + self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); + + // distinct + self.distinct |= distinct; + + // filter + self.filter_with_geo_radius |= filter_with_geo_radius; + self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; + self.filter_sum_of_criteria_terms = + self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); + self.filter_total_number_of_criteria = + self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); + for (key, value) in used_syntax.into_iter() { + let used_syntax = self.used_syntax.entry(key).or_insert(0); + *used_syntax = used_syntax.saturating_add(value); + } + + // attributes_to_search_on + self.attributes_to_search_on_total_number_of_uses = self + .attributes_to_search_on_total_number_of_uses + .saturating_add(attributes_to_search_on_total_number_of_uses); + + // q + self.max_terms_number = self.max_terms_number.max(max_terms_number); + + // vector + self.max_vector_size = self.max_vector_size.max(max_vector_size); + self.retrieve_vectors |= retrieve_vectors; + self.semantic_ratio |= semantic_ratio; + self.hybrid |= hybrid; + + // pagination + self.max_limit = self.max_limit.max(max_limit); + self.max_offset = self.max_offset.max(max_offset); + self.finite_pagination += finite_pagination; + + // formatting + self.max_attributes_to_retrieve = + self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); + self.max_attributes_to_highlight = + self.max_attributes_to_highlight.max(max_attributes_to_highlight); + self.highlight_pre_tag |= highlight_pre_tag; + self.highlight_post_tag |= highlight_post_tag; + self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop); + self.crop_marker |= crop_marker; + self.show_matches_position |= show_matches_position; + self.crop_length |= crop_length; + + // facets + self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms); + self.facets_total_number_of_facets = + self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets); + + // matching strategy + for (key, value) in matching_strategy.into_iter() { + let matching_strategy = self.matching_strategy.entry(key).or_insert(0); + *matching_strategy = matching_strategy.saturating_add(value); + } + + // scoring + self.show_ranking_score |= show_ranking_score; + self.show_ranking_score_details |= show_ranking_score_details; + self.ranking_score_threshold |= ranking_score_threshold; + + // locales + self.locales.append(&mut locales); + + self + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + time_spent, + sort_with_geo_point, + sort_sum_of_criteria_terms, + sort_total_number_of_criteria, + distinct, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + attributes_to_search_on_total_number_of_uses, + max_terms_number, + max_vector_size, + retrieve_vectors, + matching_strategy, + max_limit, + max_offset, + finite_pagination, + max_attributes_to_retrieve, + max_attributes_to_highlight, + highlight_pre_tag, + highlight_post_tag, + max_attributes_to_crop, + crop_marker, + show_matches_position, + crop_length, + facets_sum_of_terms, + facets_total_number_of_facets, + show_ranking_score, + show_ranking_score_details, + semantic_ratio, + hybrid, + total_degraded, + total_used_negative_operator, + ranking_score_threshold, + locales, + marker: _, + } = *self; + + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); + + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + "total_degraded": total_degraded, + "total_used_negative_operator": total_used_negative_operator, + }, + "sort": { + "with_geoPoint": sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), + }, + "distinct": distinct, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "attributes_to_search_on": { + "total_number_of_uses": attributes_to_search_on_total_number_of_uses, + }, + "q": { + "max_terms_number": max_terms_number, + }, + "vector": { + "max_vector_size": max_vector_size, + "retrieve_vectors": retrieve_vectors, + }, + "hybrid": { + "enabled": hybrid, + "semantic_ratio": semantic_ratio, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + "max_attributes_to_highlight": max_attributes_to_highlight, + "highlight_pre_tag": highlight_pre_tag, + "highlight_post_tag": highlight_post_tag, + "max_attributes_to_crop": max_attributes_to_crop, + "crop_marker": crop_marker, + "show_matches_position": show_matches_position, + "crop_length": crop_length, + }, + "facets": { + "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), + }, + "matching_strategy": { + "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "locales": locales, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + }, + }) + } +} diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs new file mode 100644 index 000000000..a9d8d3053 --- /dev/null +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -0,0 +1,532 @@ +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use deserr::actix_web::AwebJson; +use index_scheduler::IndexScheduler; +use meilisearch_types::deserr::DeserrJsonError; +use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::settings::{settings, SecretPolicy, Settings, Unchecked}; +use meilisearch_types::tasks::KindWithContent; +use tracing::debug; + +use super::settings_analytics::*; +use crate::analytics::Analytics; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; +use crate::Opt; + +#[macro_export] +macro_rules! make_setting_route { + ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => { + pub mod $attr { + use actix_web::web::Data; + use actix_web::{web, HttpRequest, HttpResponse, Resource}; + use index_scheduler::IndexScheduler; + use meilisearch_types::error::ResponseError; + use meilisearch_types::index_uid::IndexUid; + use meilisearch_types::milli::update::Setting; + use meilisearch_types::settings::{settings, Settings}; + use meilisearch_types::tasks::KindWithContent; + use tracing::debug; + use $crate::analytics::Analytics; + use $crate::extractors::authentication::policies::*; + use $crate::extractors::authentication::GuardedData; + use $crate::extractors::sequential_extractor::SeqHandler; + use $crate::Opt; + use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; + + pub async fn delete( + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_UPDATE }>, + Data, + >, + index_uid: web::Path, + req: HttpRequest, + opt: web::Data, + ) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() }; + + let allow_index_creation = + index_scheduler.filters().allow_index_creation(&index_uid); + + let task = KindWithContent::SettingsUpdate { + index_uid: index_uid.to_string(), + new_settings: Box::new(new_settings), + is_deletion: true, + allow_index_creation, + }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + + debug!(returns = ?task, "Delete settings"); + Ok(HttpResponse::Accepted().json(task)) + } + + pub async fn update( + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_UPDATE }>, + Data, + >, + index_uid: actix_web::web::Path, + body: deserr::actix_web::AwebJson, $err_ty>, + req: HttpRequest, + opt: web::Data, + analytics: web::Data, + ) -> std::result::Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let body = body.into_inner(); + debug!(parameters = ?body, "Update settings"); + + #[allow(clippy::redundant_closure_call)] + analytics.publish( + $crate::routes::indexes::settings_analytics::$analytics::new(body.as_ref()).into_settings(), + &req, + ); + + let new_settings = Settings { + $attr: match body { + Some(inner_body) => Setting::Set(inner_body).into(), + None => Setting::Reset.into(), + }, + ..Default::default() + }; + + let new_settings = $crate::routes::indexes::settings::validate_settings( + new_settings, + &index_scheduler, + )?; + + let allow_index_creation = + index_scheduler.filters().allow_index_creation(&index_uid); + + let task = KindWithContent::SettingsUpdate { + index_uid: index_uid.to_string(), + new_settings: Box::new(new_settings), + is_deletion: false, + allow_index_creation, + }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + + debug!(returns = ?task, "Update settings"); + Ok(HttpResponse::Accepted().json(task)) + } + + pub async fn get( + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_GET }>, + Data, + >, + index_uid: actix_web::web::Path, + ) -> std::result::Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?; + + debug!(returns = ?settings, "Update settings"); + + Ok(HttpResponse::Ok().json(settings.$attr)) + } + + pub fn resources() -> Resource { + Resource::new($route) + .route(web::get().to(SeqHandler(get))) + .route(web::$update_verb().to(SeqHandler(update))) + .route(web::delete().to(SeqHandler(delete))) + } + } + }; +} + +make_setting_route!( + "/filterable-attributes", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes, + >, + filterable_attributes, + "filterableAttributes", + FilterableAttributesAnalytics +); + +make_setting_route!( + "/sortable-attributes", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes, + >, + sortable_attributes, + "sortableAttributes", + SortableAttributesAnalytics +); + +make_setting_route!( + "/displayed-attributes", + put, + Vec, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes, + >, + displayed_attributes, + "displayedAttributes", + DisplayedAttributesAnalytics +); + +make_setting_route!( + "/typo-tolerance", + patch, + meilisearch_types::settings::TypoSettings, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance, + >, + typo_tolerance, + "typoTolerance", + TypoToleranceAnalytics +); + +make_setting_route!( + "/searchable-attributes", + put, + Vec, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes, + >, + searchable_attributes, + "searchableAttributes", + SearchableAttributesAnalytics +); + +make_setting_route!( + "/stop-words", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsStopWords, + >, + stop_words, + "stopWords", + StopWordsAnalytics +); + +make_setting_route!( + "/non-separator-tokens", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens, + >, + non_separator_tokens, + "nonSeparatorTokens", + NonSeparatorTokensAnalytics +); + +make_setting_route!( + "/separator-tokens", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens, + >, + separator_tokens, + "separatorTokens", + SeparatorTokensAnalytics +); + +make_setting_route!( + "/dictionary", + put, + std::collections::BTreeSet, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsDictionary, + >, + dictionary, + "dictionary", + DictionaryAnalytics +); + +make_setting_route!( + "/synonyms", + put, + std::collections::BTreeMap>, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms, + >, + synonyms, + "synonyms", + SynonymsAnalytics +); + +make_setting_route!( + "/distinct-attribute", + put, + String, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute, + >, + distinct_attribute, + "distinctAttribute", + DistinctAttributeAnalytics +); + +make_setting_route!( + "/proximity-precision", + put, + meilisearch_types::settings::ProximityPrecisionView, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision, + >, + proximity_precision, + "proximityPrecision", + ProximityPrecisionAnalytics +); + +make_setting_route!( + "/localized-attributes", + put, + Vec, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes, + >, + localized_attributes, + "localizedAttributes", + LocalesAnalytics +); + +make_setting_route!( + "/ranking-rules", + put, + Vec, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules, + >, + ranking_rules, + "rankingRules", + RankingRulesAnalytics +); + +make_setting_route!( + "/faceting", + patch, + meilisearch_types::settings::FacetingSettings, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsFaceting, + >, + faceting, + "faceting", + FacetingAnalytics +); + +make_setting_route!( + "/pagination", + patch, + meilisearch_types::settings::PaginationSettings, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsPagination, + >, + pagination, + "pagination", + PaginationAnalytics +); + +make_setting_route!( + "/embedders", + patch, + std::collections::BTreeMap>, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders, + >, + embedders, + "embedders", + EmbeddersAnalytics +); + +make_setting_route!( + "/search-cutoff-ms", + put, + u64, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs, + >, + search_cutoff_ms, + "searchCutoffMs", + SearchCutoffMsAnalytics +); + +macro_rules! generate_configure { + ($($mod:ident),*) => { + pub fn configure(cfg: &mut web::ServiceConfig) { + use crate::extractors::sequential_extractor::SeqHandler; + cfg.service( + web::resource("") + .route(web::patch().to(SeqHandler(update_all))) + .route(web::get().to(SeqHandler(get_all))) + .route(web::delete().to(SeqHandler(delete_all)))) + $(.service($mod::resources()))*; + } + }; +} + +generate_configure!( + filterable_attributes, + sortable_attributes, + displayed_attributes, + localized_attributes, + searchable_attributes, + distinct_attribute, + proximity_precision, + stop_words, + separator_tokens, + non_separator_tokens, + dictionary, + synonyms, + ranking_rules, + typo_tolerance, + pagination, + faceting, + embedders, + search_cutoff_ms +); + +pub async fn update_all( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + body: AwebJson, DeserrJsonError>, + req: HttpRequest, + opt: web::Data, + analytics: web::Data, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let new_settings = body.into_inner(); + debug!(parameters = ?new_settings, "Update all settings"); + let new_settings = validate_settings(new_settings, &index_scheduler)?; + + analytics.publish( + SettingsAnalytics { + ranking_rules: RankingRulesAnalytics::new(new_settings.ranking_rules.as_ref().set()), + searchable_attributes: SearchableAttributesAnalytics::new( + new_settings.searchable_attributes.as_ref().set(), + ), + displayed_attributes: DisplayedAttributesAnalytics::new( + new_settings.displayed_attributes.as_ref().set(), + ), + sortable_attributes: SortableAttributesAnalytics::new( + new_settings.sortable_attributes.as_ref().set(), + ), + filterable_attributes: FilterableAttributesAnalytics::new( + new_settings.filterable_attributes.as_ref().set(), + ), + distinct_attribute: DistinctAttributeAnalytics::new( + new_settings.distinct_attribute.as_ref().set(), + ), + proximity_precision: ProximityPrecisionAnalytics::new( + new_settings.proximity_precision.as_ref().set(), + ), + typo_tolerance: TypoToleranceAnalytics::new(new_settings.typo_tolerance.as_ref().set()), + faceting: FacetingAnalytics::new(new_settings.faceting.as_ref().set()), + pagination: PaginationAnalytics::new(new_settings.pagination.as_ref().set()), + stop_words: StopWordsAnalytics::new(new_settings.stop_words.as_ref().set()), + synonyms: SynonymsAnalytics::new(new_settings.synonyms.as_ref().set()), + embedders: EmbeddersAnalytics::new(new_settings.embedders.as_ref().set()), + search_cutoff_ms: SearchCutoffMsAnalytics::new( + new_settings.search_cutoff_ms.as_ref().set(), + ), + locales: LocalesAnalytics::new(new_settings.localized_attributes.as_ref().set()), + dictionary: DictionaryAnalytics::new(new_settings.dictionary.as_ref().set()), + separator_tokens: SeparatorTokensAnalytics::new( + new_settings.separator_tokens.as_ref().set(), + ), + non_separator_tokens: NonSeparatorTokensAnalytics::new( + new_settings.non_separator_tokens.as_ref().set(), + ), + }, + &req, + ); + + let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), + is_deletion: false, + allow_index_creation, + }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + + debug!(returns = ?task, "Update all settings"); + Ok(HttpResponse::Accepted().json(task)) +} + +pub async fn get_all( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let new_settings = settings(&index, &rtxn, SecretPolicy::HideSecrets)?; + debug!(returns = ?new_settings, "Get all settings"); + Ok(HttpResponse::Ok().json(new_settings)) +} + +pub async fn delete_all( + index_scheduler: GuardedData, Data>, + index_uid: web::Path, + req: HttpRequest, + opt: web::Data, +) -> Result { + let index_uid = IndexUid::try_from(index_uid.into_inner())?; + + let new_settings = Settings::cleared().into_unchecked(); + + let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), + is_deletion: true, + allow_index_creation, + }; + let uid = get_task_id(&req, &opt)?; + let dry_run = is_dry_run(&req, &opt)?; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) + .await?? + .into(); + + debug!(returns = ?task, "Delete all settings"); + Ok(HttpResponse::Accepted().json(task)) +} + +fn validate_settings( + settings: Settings, + index_scheduler: &IndexScheduler, +) -> Result, ResponseError> { + if matches!(settings.embedders, Setting::Set(_)) { + index_scheduler.features().check_vector("Passing `embedders` in settings")? + } + Ok(settings.validate()?) +} diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs new file mode 100644 index 000000000..32bddcbdd --- /dev/null +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -0,0 +1,622 @@ +//! All the structures used to make the analytics on the settings works. +//! The signatures of the `new` functions are not very rust idiomatic because they must match the types received +//! through the sub-settings route directly without any manipulation. +//! This is why we often use a `Option<&Vec<_>>` instead of a `Option<&[_]>`. + +use std::collections::{BTreeMap, BTreeSet, HashSet}; + +use meilisearch_types::facet_values_sort::FacetValuesSort; +use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; +use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::vector::settings::EmbeddingSettings; +use meilisearch_types::settings::{ + FacetingSettings, PaginationSettings, ProximityPrecisionView, RankingRuleView, TypoSettings, +}; +use serde::Serialize; + +use crate::analytics::Aggregate; + +#[derive(Serialize, Default)] +pub struct SettingsAnalytics { + pub ranking_rules: RankingRulesAnalytics, + pub searchable_attributes: SearchableAttributesAnalytics, + pub displayed_attributes: DisplayedAttributesAnalytics, + pub sortable_attributes: SortableAttributesAnalytics, + pub filterable_attributes: FilterableAttributesAnalytics, + pub distinct_attribute: DistinctAttributeAnalytics, + pub proximity_precision: ProximityPrecisionAnalytics, + pub typo_tolerance: TypoToleranceAnalytics, + pub faceting: FacetingAnalytics, + pub pagination: PaginationAnalytics, + pub stop_words: StopWordsAnalytics, + pub synonyms: SynonymsAnalytics, + pub embedders: EmbeddersAnalytics, + pub search_cutoff_ms: SearchCutoffMsAnalytics, + pub locales: LocalesAnalytics, + pub dictionary: DictionaryAnalytics, + pub separator_tokens: SeparatorTokensAnalytics, + pub non_separator_tokens: NonSeparatorTokensAnalytics, +} + +impl Aggregate for SettingsAnalytics { + fn event_name(&self) -> &'static str { + "Settings Updated" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + ranking_rules: RankingRulesAnalytics { + words_position: new + .ranking_rules + .words_position + .or(self.ranking_rules.words_position), + typo_position: new.ranking_rules.typo_position.or(self.ranking_rules.typo_position), + proximity_position: new + .ranking_rules + .proximity_position + .or(self.ranking_rules.proximity_position), + attribute_position: new + .ranking_rules + .attribute_position + .or(self.ranking_rules.attribute_position), + sort_position: new.ranking_rules.sort_position.or(self.ranking_rules.sort_position), + exactness_position: new + .ranking_rules + .exactness_position + .or(self.ranking_rules.exactness_position), + values: new.ranking_rules.values.or(self.ranking_rules.values), + }, + searchable_attributes: SearchableAttributesAnalytics { + total: new.searchable_attributes.total.or(self.searchable_attributes.total), + with_wildcard: new + .searchable_attributes + .with_wildcard + .or(self.searchable_attributes.with_wildcard), + }, + displayed_attributes: DisplayedAttributesAnalytics { + total: new.displayed_attributes.total.or(self.displayed_attributes.total), + with_wildcard: new + .displayed_attributes + .with_wildcard + .or(self.displayed_attributes.with_wildcard), + }, + sortable_attributes: SortableAttributesAnalytics { + total: new.sortable_attributes.total.or(self.sortable_attributes.total), + has_geo: new.sortable_attributes.has_geo.or(self.sortable_attributes.has_geo), + }, + filterable_attributes: FilterableAttributesAnalytics { + total: new.filterable_attributes.total.or(self.filterable_attributes.total), + has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo), + }, + distinct_attribute: DistinctAttributeAnalytics { + set: self.distinct_attribute.set | new.distinct_attribute.set, + }, + proximity_precision: ProximityPrecisionAnalytics { + set: self.proximity_precision.set | new.proximity_precision.set, + value: new.proximity_precision.value.or(self.proximity_precision.value), + }, + typo_tolerance: TypoToleranceAnalytics { + enabled: new.typo_tolerance.enabled.or(self.typo_tolerance.enabled), + disable_on_attributes: new + .typo_tolerance + .disable_on_attributes + .or(self.typo_tolerance.disable_on_attributes), + disable_on_words: new + .typo_tolerance + .disable_on_words + .or(self.typo_tolerance.disable_on_words), + min_word_size_for_one_typo: new + .typo_tolerance + .min_word_size_for_one_typo + .or(self.typo_tolerance.min_word_size_for_one_typo), + min_word_size_for_two_typos: new + .typo_tolerance + .min_word_size_for_two_typos + .or(self.typo_tolerance.min_word_size_for_two_typos), + }, + faceting: FacetingAnalytics { + max_values_per_facet: new + .faceting + .max_values_per_facet + .or(self.faceting.max_values_per_facet), + sort_facet_values_by_star_count: new + .faceting + .sort_facet_values_by_star_count + .or(self.faceting.sort_facet_values_by_star_count), + sort_facet_values_by_total: new + .faceting + .sort_facet_values_by_total + .or(self.faceting.sort_facet_values_by_total), + }, + pagination: PaginationAnalytics { + max_total_hits: new.pagination.max_total_hits.or(self.pagination.max_total_hits), + }, + stop_words: StopWordsAnalytics { + total: new.stop_words.total.or(self.stop_words.total), + }, + synonyms: SynonymsAnalytics { total: new.synonyms.total.or(self.synonyms.total) }, + embedders: EmbeddersAnalytics { + total: new.embedders.total.or(self.embedders.total), + sources: match (self.embedders.sources, new.embedders.sources) { + (None, None) => None, + (Some(sources), None) | (None, Some(sources)) => Some(sources), + (Some(this), Some(other)) => Some(this.union(&other).cloned().collect()), + }, + document_template_used: match ( + self.embedders.document_template_used, + new.embedders.document_template_used, + ) { + (None, None) => None, + (Some(used), None) | (None, Some(used)) => Some(used), + (Some(this), Some(other)) => Some(this | other), + }, + document_template_max_bytes: match ( + self.embedders.document_template_max_bytes, + new.embedders.document_template_max_bytes, + ) { + (None, None) => None, + (Some(bytes), None) | (None, Some(bytes)) => Some(bytes), + (Some(this), Some(other)) => Some(this.max(other)), + }, + binary_quantization_used: match ( + self.embedders.binary_quantization_used, + new.embedders.binary_quantization_used, + ) { + (None, None) => None, + (Some(bq), None) | (None, Some(bq)) => Some(bq), + (Some(this), Some(other)) => Some(this | other), + }, + }, + search_cutoff_ms: SearchCutoffMsAnalytics { + search_cutoff_ms: new + .search_cutoff_ms + .search_cutoff_ms + .or(self.search_cutoff_ms.search_cutoff_ms), + }, + locales: LocalesAnalytics { locales: new.locales.locales.or(self.locales.locales) }, + dictionary: DictionaryAnalytics { + total: new.dictionary.total.or(self.dictionary.total), + }, + separator_tokens: SeparatorTokensAnalytics { + total: new.non_separator_tokens.total.or(self.separator_tokens.total), + }, + non_separator_tokens: NonSeparatorTokensAnalytics { + total: new.non_separator_tokens.total.or(self.non_separator_tokens.total), + }, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + +#[derive(Serialize, Default)] +pub struct RankingRulesAnalytics { + pub words_position: Option, + pub typo_position: Option, + pub proximity_position: Option, + pub attribute_position: Option, + pub sort_position: Option, + pub exactness_position: Option, + pub values: Option, +} + +impl RankingRulesAnalytics { + pub fn new(rr: Option<&Vec>) -> Self { + RankingRulesAnalytics { + words_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words)) + }), + typo_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo)) + }), + proximity_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Proximity) + }) + }), + attribute_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Attribute) + }) + }), + sort_position: rr.as_ref().and_then(|rr| { + rr.iter() + .position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort)) + }), + exactness_position: rr.as_ref().and_then(|rr| { + rr.iter().position(|s| { + matches!(s, meilisearch_types::settings::RankingRuleView::Exactness) + }) + }), + values: rr.as_ref().map(|rr| { + rr.iter() + .filter(|s| { + matches!( + s, + meilisearch_types::settings::RankingRuleView::Asc(_) + | meilisearch_types::settings::RankingRuleView::Desc(_) + ) + }) + .map(|x| x.to_string()) + .collect::>() + .join(", ") + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { ranking_rules: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SearchableAttributesAnalytics { + pub total: Option, + pub with_wildcard: Option, +} + +impl SearchableAttributesAnalytics { + pub fn new(setting: Option<&Vec>) -> Self { + Self { + total: setting.as_ref().map(|searchable| searchable.len()), + with_wildcard: setting + .as_ref() + .map(|searchable| searchable.iter().any(|searchable| searchable == "*")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { searchable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DisplayedAttributesAnalytics { + pub total: Option, + pub with_wildcard: Option, +} + +impl DisplayedAttributesAnalytics { + pub fn new(displayed: Option<&Vec>) -> Self { + Self { + total: displayed.as_ref().map(|displayed| displayed.len()), + with_wildcard: displayed + .as_ref() + .map(|displayed| displayed.iter().any(|displayed| displayed == "*")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { displayed_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SortableAttributesAnalytics { + pub total: Option, + pub has_geo: Option, +} + +impl SortableAttributesAnalytics { + pub fn new(setting: Option<&BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|sort| sort.len()), + has_geo: setting.as_ref().map(|sort| sort.contains("_geo")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { sortable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct FilterableAttributesAnalytics { + pub total: Option, + pub has_geo: Option, +} + +impl FilterableAttributesAnalytics { + pub fn new(setting: Option<&BTreeSet>) -> Self { + Self { + total: setting.as_ref().map(|filter| filter.len()), + has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { filterable_attributes: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DistinctAttributeAnalytics { + pub set: bool, +} + +impl DistinctAttributeAnalytics { + pub fn new(distinct: Option<&String>) -> Self { + Self { set: distinct.is_some() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { distinct_attribute: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct ProximityPrecisionAnalytics { + pub set: bool, + pub value: Option, +} + +impl ProximityPrecisionAnalytics { + pub fn new(precision: Option<&ProximityPrecisionView>) -> Self { + Self { set: precision.is_some(), value: precision.cloned() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { proximity_precision: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct TypoToleranceAnalytics { + pub enabled: Option, + pub disable_on_attributes: Option, + pub disable_on_words: Option, + pub min_word_size_for_one_typo: Option, + pub min_word_size_for_two_typos: Option, +} + +impl TypoToleranceAnalytics { + pub fn new(setting: Option<&TypoSettings>) -> Self { + Self { + enabled: setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), + disable_on_attributes: setting + .as_ref() + .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), + disable_on_words: setting + .as_ref() + .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), + min_word_size_for_one_typo: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.one_typo.set())) + .flatten(), + min_word_size_for_two_typos: setting + .as_ref() + .and_then(|s| s.min_word_size_for_typos.as_ref().set().map(|s| s.two_typos.set())) + .flatten(), + } + } + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { typo_tolerance: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct FacetingAnalytics { + pub max_values_per_facet: Option, + pub sort_facet_values_by_star_count: Option, + pub sort_facet_values_by_total: Option, +} + +impl FacetingAnalytics { + pub fn new(setting: Option<&FacetingSettings>) -> Self { + Self { + max_values_per_facet: setting.as_ref().and_then(|s| s.max_values_per_facet.set()), + sort_facet_values_by_star_count: setting.as_ref().and_then(|s| { + s.sort_facet_values_by + .as_ref() + .set() + .map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) + }), + sort_facet_values_by_total: setting + .as_ref() + .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { faceting: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct PaginationAnalytics { + pub max_total_hits: Option, +} + +impl PaginationAnalytics { + pub fn new(setting: Option<&PaginationSettings>) -> Self { + Self { max_total_hits: setting.as_ref().and_then(|s| s.max_total_hits.set()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { pagination: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct StopWordsAnalytics { + pub total: Option, +} + +impl StopWordsAnalytics { + pub fn new(stop_words: Option<&BTreeSet>) -> Self { + Self { total: stop_words.as_ref().map(|stop_words| stop_words.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { stop_words: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SynonymsAnalytics { + pub total: Option, +} + +impl SynonymsAnalytics { + pub fn new(synonyms: Option<&BTreeMap>>) -> Self { + Self { total: synonyms.as_ref().map(|synonyms| synonyms.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { synonyms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct EmbeddersAnalytics { + // last + pub total: Option, + // Merge the sources + pub sources: Option>, + // |= + pub document_template_used: Option, + // max + pub document_template_max_bytes: Option, + // |= + pub binary_quantization_used: Option, +} + +impl EmbeddersAnalytics { + pub fn new(setting: Option<&BTreeMap>>) -> Self { + let mut sources = std::collections::HashSet::new(); + + if let Some(s) = &setting { + for source in s + .values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.source.set()) + { + use meilisearch_types::milli::vector::settings::EmbedderSource; + match source { + EmbedderSource::OpenAi => sources.insert("openAi".to_string()), + EmbedderSource::HuggingFace => sources.insert("huggingFace".to_string()), + EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), + EmbedderSource::Ollama => sources.insert("ollama".to_string()), + EmbedderSource::Rest => sources.insert("rest".to_string()), + }; + } + }; + + Self { + total: setting.as_ref().map(|s| s.len()), + sources: Some(sources), + document_template_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.document_template.set().is_some()) + }), + document_template_max_bytes: setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }), + binary_quantization_used: setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { embedders: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +pub struct SearchCutoffMsAnalytics { + pub search_cutoff_ms: Option, +} + +impl SearchCutoffMsAnalytics { + pub fn new(setting: Option<&u64>) -> Self { + Self { search_cutoff_ms: setting.copied() } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { search_cutoff_ms: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +#[serde(transparent)] +pub struct LocalesAnalytics { + pub locales: Option>, +} + +impl LocalesAnalytics { + pub fn new(rules: Option<&Vec>) -> Self { + LocalesAnalytics { + locales: rules.as_ref().map(|rules| { + rules + .iter() + .flat_map(|rule| rule.locales.iter().cloned()) + .collect::>() + }), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { locales: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct DictionaryAnalytics { + pub total: Option, +} + +impl DictionaryAnalytics { + pub fn new(dictionary: Option<&BTreeSet>) -> Self { + Self { total: dictionary.as_ref().map(|dictionary| dictionary.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { dictionary: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct SeparatorTokensAnalytics { + pub total: Option, +} + +impl SeparatorTokensAnalytics { + pub fn new(separator_tokens: Option<&BTreeSet>) -> Self { + Self { total: separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()) } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { separator_tokens: self, ..Default::default() } + } +} + +#[derive(Serialize, Default)] +pub struct NonSeparatorTokensAnalytics { + pub total: Option, +} + +impl NonSeparatorTokensAnalytics { + pub fn new(non_separator_tokens: Option<&BTreeSet>) -> Self { + Self { + total: non_separator_tokens + .as_ref() + .map(|non_separator_tokens| non_separator_tokens.len()), + } + } + + pub fn into_settings(self) -> SettingsAnalytics { + SettingsAnalytics { non_separator_tokens: self, ..Default::default() } + } +} diff --git a/meilisearch/src/routes/indexes/similar.rs b/crates/meilisearch/src/routes/indexes/similar.rs similarity index 94% rename from meilisearch/src/routes/indexes/similar.rs rename to crates/meilisearch/src/routes/indexes/similar.rs index f94a02987..79f42f0aa 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/crates/meilisearch/src/routes/indexes/similar.rs @@ -13,9 +13,10 @@ use serde_json::Value; use tracing::debug; use super::ActionPolicy; -use crate::analytics::{Analytics, SimilarAggregator}; +use crate::analytics::Analytics; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::routes::indexes::similar_analytics::{SimilarAggregator, SimilarGET, SimilarPOST}; use crate::search::{ add_search_rules, perform_similar, RankingScoreThresholdSimilar, RetrieveVectors, SearchKind, SimilarQuery, SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, @@ -34,13 +35,13 @@ pub async fn similar_get( index_uid: web::Path, params: AwebQueryParameter, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.0.try_into()?; - let mut aggregate = SimilarAggregator::from_query(&query, &req); + let mut aggregate = SimilarAggregator::::from_query(&query); debug!(parameters = ?query, "Similar get"); @@ -49,7 +50,7 @@ pub async fn similar_get( if let Ok(similar) = &similar { aggregate.succeed(similar); } - analytics.get_similar(aggregate); + analytics.publish(aggregate, &req); let similar = similar?; @@ -62,21 +63,21 @@ pub async fn similar_post( index_uid: web::Path, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { let index_uid = IndexUid::try_from(index_uid.into_inner())?; let query = params.into_inner(); debug!(parameters = ?query, "Similar post"); - let mut aggregate = SimilarAggregator::from_query(&query, &req); + let mut aggregate = SimilarAggregator::::from_query(&query); let similar = similar(index_scheduler, index_uid, query).await; if let Ok(similar) = &similar { aggregate.succeed(similar); } - analytics.post_similar(aggregate); + analytics.publish(aggregate, &req); let similar = similar?; diff --git a/crates/meilisearch/src/routes/indexes/similar_analytics.rs b/crates/meilisearch/src/routes/indexes/similar_analytics.rs new file mode 100644 index 000000000..726839c3a --- /dev/null +++ b/crates/meilisearch/src/routes/indexes/similar_analytics.rs @@ -0,0 +1,233 @@ +use std::collections::{BinaryHeap, HashMap}; + +use once_cell::sync::Lazy; +use regex::Regex; +use serde_json::{json, Value}; + +use crate::aggregate_methods; +use crate::analytics::{Aggregate, AggregateMethod}; +use crate::search::{SimilarQuery, SimilarResult}; + +aggregate_methods!( + SimilarPOST => "Similar POST", + SimilarGET => "Similar GET", +); + +#[derive(Default)] +pub struct SimilarAggregator { + // requests + total_received: usize, + total_succeeded: usize, + time_spent: BinaryHeap, + + // filter + filter_with_geo_radius: bool, + filter_with_geo_bounding_box: bool, + // every time a request has a filter, this field must be incremented by the number of terms it contains + filter_sum_of_criteria_terms: usize, + // every time a request has a filter, this field must be incremented by one + filter_total_number_of_criteria: usize, + used_syntax: HashMap, + + // Whether a non-default embedder was specified + retrieve_vectors: bool, + + // pagination + max_limit: usize, + max_offset: usize, + + // formatting + max_attributes_to_retrieve: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, + ranking_score_threshold: bool, + + marker: std::marker::PhantomData, +} + +impl SimilarAggregator { + #[allow(clippy::field_reassign_with_default)] + pub fn from_query(query: &SimilarQuery) -> Self { + let SimilarQuery { + id: _, + embedder: _, + offset, + limit, + attributes_to_retrieve: _, + retrieve_vectors, + show_ranking_score, + show_ranking_score_details, + filter, + ranking_score_threshold, + } = query; + + let mut ret = Self::default(); + + ret.total_received = 1; + + if let Some(ref filter) = filter { + static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + ret.filter_total_number_of_criteria = 1; + + let syntax = match filter { + Value::String(_) => "string".to_string(), + Value::Array(values) => { + if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + // convert the string to a HashMap + ret.used_syntax.insert(syntax, 1); + + let stringified_filters = filter.to_string(); + ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); + ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); + } + + ret.max_limit = *limit; + ret.max_offset = *offset; + + ret.show_ranking_score = *show_ranking_score; + ret.show_ranking_score_details = *show_ranking_score_details; + ret.ranking_score_threshold = ranking_score_threshold.is_some(); + + ret.retrieve_vectors = *retrieve_vectors; + + ret + } + + pub fn succeed(&mut self, result: &SimilarResult) { + let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result; + + self.total_succeeded = self.total_succeeded.saturating_add(1); + + self.time_spent.push(*processing_time_ms as usize); + } +} + +impl Aggregate for SimilarAggregator { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + /// Aggregate one [SimilarAggregator] into another. + fn aggregate(mut self: Box, new: Box) -> Box { + let Self { + total_received, + total_succeeded, + mut time_spent, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + max_limit, + max_offset, + max_attributes_to_retrieve, + show_ranking_score, + show_ranking_score_details, + ranking_score_threshold, + retrieve_vectors, + marker: _, + } = *new; + + // request + self.total_received = self.total_received.saturating_add(total_received); + self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.time_spent.append(&mut time_spent); + + // filter + self.filter_with_geo_radius |= filter_with_geo_radius; + self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; + self.filter_sum_of_criteria_terms = + self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); + self.filter_total_number_of_criteria = + self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); + for (key, value) in used_syntax.into_iter() { + let used_syntax = self.used_syntax.entry(key).or_insert(0); + *used_syntax = used_syntax.saturating_add(value); + } + + self.retrieve_vectors |= retrieve_vectors; + + // pagination + self.max_limit = self.max_limit.max(max_limit); + self.max_offset = self.max_offset.max(max_offset); + + // formatting + self.max_attributes_to_retrieve = + self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); + + // scoring + self.show_ranking_score |= show_ranking_score; + self.show_ranking_score_details |= show_ranking_score_details; + self.ranking_score_threshold |= ranking_score_threshold; + + self + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + time_spent, + filter_with_geo_radius, + filter_with_geo_bounding_box, + filter_sum_of_criteria_terms, + filter_total_number_of_criteria, + used_syntax, + max_limit, + max_offset, + max_attributes_to_retrieve, + show_ranking_score, + show_ranking_score_details, + ranking_score_threshold, + retrieve_vectors, + marker: _, + } = *self; + + // we get all the values in a sorted manner + let time_spent = time_spent.into_sorted_vec(); + // the index of the 99th percentage of value + let percentile_99th = time_spent.len() * 99 / 100; + // We are only interested by the slowest value of the 99th fastest results + let time_spent = time_spent.get(percentile_99th); + + json!({ + "requests": { + "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "filter": { + "with_geoRadius": filter_with_geo_radius, + "with_geoBoundingBox": filter_with_geo_bounding_box, + "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), + "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "vector": { + "retrieve_vectors": retrieve_vectors, + }, + "pagination": { + "max_limit": max_limit, + "max_offset": max_offset, + }, + "formatting": { + "max_attributes_to_retrieve": max_attributes_to_retrieve, + }, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + "ranking_score_threshold": ranking_score_threshold, + } + }) + } +} diff --git a/meilisearch/src/routes/logs.rs b/crates/meilisearch/src/routes/logs.rs similarity index 100% rename from meilisearch/src/routes/logs.rs rename to crates/meilisearch/src/routes/logs.rs diff --git a/meilisearch/src/routes/metrics.rs b/crates/meilisearch/src/routes/metrics.rs similarity index 100% rename from meilisearch/src/routes/metrics.rs rename to crates/meilisearch/src/routes/metrics.rs diff --git a/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs similarity index 99% rename from meilisearch/src/routes/mod.rs rename to crates/meilisearch/src/routes/mod.rs index c25aeee70..b7260ea08 100644 --- a/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -25,6 +25,7 @@ pub mod indexes; mod logs; mod metrics; mod multi_search; +mod multi_search_analytics; mod snapshot; mod swap_indexes; pub mod tasks; diff --git a/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs similarity index 96% rename from meilisearch/src/routes/multi_search.rs rename to crates/meilisearch/src/routes/multi_search.rs index 5fcb868c6..f8b1bc6ee 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/crates/meilisearch/src/routes/multi_search.rs @@ -9,7 +9,8 @@ use meilisearch_types::keys::actions; use serde::Serialize; use tracing::debug; -use crate::analytics::{Analytics, MultiSearchAggregator}; +use super::multi_search_analytics::MultiSearchAggregator; +use crate::analytics::Analytics; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -35,7 +36,7 @@ pub async fn multi_search_with_post( search_queue: Data, params: AwebJson, req: HttpRequest, - analytics: web::Data, + analytics: web::Data, ) -> Result { // Since we don't want to process half of the search requests and then get a permit refused // we're going to get one permit for the whole duration of the multi-search request. @@ -43,7 +44,7 @@ pub async fn multi_search_with_post( let federated_search = params.into_inner(); - let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req); + let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search); let FederatedSearch { mut queries, federation } = federated_search; @@ -87,7 +88,7 @@ pub async fn multi_search_with_post( multi_aggregate.succeed(); } - analytics.post_multi_search(multi_aggregate); + analytics.publish(multi_aggregate, &req); HttpResponse::Ok().json(search_result??) } None => { @@ -149,7 +150,7 @@ pub async fn multi_search_with_post( if search_results.is_ok() { multi_aggregate.succeed(); } - analytics.post_multi_search(multi_aggregate); + analytics.publish(multi_aggregate, &req); let search_results = search_results.map_err(|(mut err, query_index)| { // Add the query index that failed as context for the error message. diff --git a/crates/meilisearch/src/routes/multi_search_analytics.rs b/crates/meilisearch/src/routes/multi_search_analytics.rs new file mode 100644 index 000000000..3d07f471c --- /dev/null +++ b/crates/meilisearch/src/routes/multi_search_analytics.rs @@ -0,0 +1,168 @@ +use std::collections::HashSet; + +use serde_json::json; + +use crate::analytics::Aggregate; +use crate::search::{FederatedSearch, SearchQueryWithIndex}; + +#[derive(Default)] +pub struct MultiSearchAggregator { + // requests + total_received: usize, + total_succeeded: usize, + + // sum of the number of distinct indexes in each single request, use with total_received to compute an avg + total_distinct_index_count: usize, + // number of queries with a single index, use with total_received to compute a proportion + total_single_index: usize, + + // sum of the number of search queries in the requests, use with total_received to compute an average + total_search_count: usize, + + // scoring + show_ranking_score: bool, + show_ranking_score_details: bool, + + // federation + use_federation: bool, +} + +impl MultiSearchAggregator { + pub fn from_federated_search(federated_search: &FederatedSearch) -> Self { + let use_federation = federated_search.federation.is_some(); + + let distinct_indexes: HashSet<_> = federated_search + .queries + .iter() + .map(|query| { + let query = &query; + // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex + let SearchQueryWithIndex { + index_uid, + federation_options: _, + q: _, + vector: _, + offset: _, + limit: _, + page: _, + hits_per_page: _, + attributes_to_retrieve: _, + retrieve_vectors: _, + attributes_to_crop: _, + crop_length: _, + attributes_to_highlight: _, + show_ranking_score: _, + show_ranking_score_details: _, + show_matches_position: _, + filter: _, + sort: _, + distinct: _, + facets: _, + highlight_pre_tag: _, + highlight_post_tag: _, + crop_marker: _, + matching_strategy: _, + attributes_to_search_on: _, + hybrid: _, + ranking_score_threshold: _, + locales: _, + } = query; + + index_uid.as_str() + }) + .collect(); + + let show_ranking_score = + federated_search.queries.iter().any(|query| query.show_ranking_score); + let show_ranking_score_details = + federated_search.queries.iter().any(|query| query.show_ranking_score_details); + + Self { + total_received: 1, + total_succeeded: 0, + total_distinct_index_count: distinct_indexes.len(), + total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, + total_search_count: federated_search.queries.len(), + show_ranking_score, + show_ranking_score_details, + use_federation, + } + } + + pub fn succeed(&mut self) { + self.total_succeeded = self.total_succeeded.saturating_add(1); + } +} + +impl Aggregate for MultiSearchAggregator { + fn event_name(&self) -> &'static str { + "Documents Searched by Multi-Search POST" + } + + /// Aggregate one [MultiSearchAggregator] into another. + fn aggregate(self: Box, new: Box) -> Box { + // write the aggregate in a way that will cause a compilation error if a field is added. + + // get ownership of self, replacing it by a default value. + let this = *self; + + let total_received = this.total_received.saturating_add(new.total_received); + let total_succeeded = this.total_succeeded.saturating_add(new.total_succeeded); + let total_distinct_index_count = + this.total_distinct_index_count.saturating_add(new.total_distinct_index_count); + let total_single_index = this.total_single_index.saturating_add(new.total_single_index); + let total_search_count = this.total_search_count.saturating_add(new.total_search_count); + let show_ranking_score = this.show_ranking_score || new.show_ranking_score; + let show_ranking_score_details = + this.show_ranking_score_details || new.show_ranking_score_details; + let use_federation = this.use_federation || new.use_federation; + + Box::new(Self { + total_received, + total_succeeded, + total_distinct_index_count, + total_single_index, + total_search_count, + show_ranking_score, + show_ranking_score_details, + use_federation, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + let Self { + total_received, + total_succeeded, + total_distinct_index_count, + total_single_index, + total_search_count, + show_ranking_score, + show_ranking_score_details, + use_federation, + } = *self; + + json!({ + "requests": { + "total_succeeded": total_succeeded, + "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics + "total_received": total_received, + }, + "indexes": { + "total_single_index": total_single_index, + "total_distinct_index_count": total_distinct_index_count, + "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early + }, + "searches": { + "total_search_count": total_search_count, + "avg_search_count": (total_search_count as f64) / (total_received as f64), + }, + "scoring": { + "show_ranking_score": show_ranking_score, + "show_ranking_score_details": show_ranking_score_details, + }, + "federation": { + "use_federation": use_federation, + } + }) + } +} diff --git a/meilisearch/src/routes/snapshot.rs b/crates/meilisearch/src/routes/snapshot.rs similarity index 88% rename from meilisearch/src/routes/snapshot.rs rename to crates/meilisearch/src/routes/snapshot.rs index 84673729f..cacbc41af 100644 --- a/meilisearch/src/routes/snapshot.rs +++ b/crates/meilisearch/src/routes/snapshot.rs @@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::IndexScheduler; use meilisearch_types::error::ResponseError; use meilisearch_types::tasks::KindWithContent; -use serde_json::json; use tracing::debug; use crate::analytics::Analytics; @@ -17,13 +16,15 @@ pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot)))); } +crate::empty_analytics!(SnapshotAnalytics, "Snapshot Created"); + pub async fn create_snapshot( index_scheduler: GuardedData, Data>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { - analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req)); + analytics.publish(SnapshotAnalytics::default(), &req); let task = KindWithContent::SnapshotCreation; let uid = get_task_id(&req, &opt)?; diff --git a/meilisearch/src/routes/swap_indexes.rs b/crates/meilisearch/src/routes/swap_indexes.rs similarity index 77% rename from meilisearch/src/routes/swap_indexes.rs rename to crates/meilisearch/src/routes/swap_indexes.rs index 51a7b0707..9b8b67e63 100644 --- a/meilisearch/src/routes/swap_indexes.rs +++ b/crates/meilisearch/src/routes/swap_indexes.rs @@ -8,10 +8,10 @@ use meilisearch_types::error::deserr_codes::InvalidSwapIndexes; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::tasks::{IndexSwap, KindWithContent}; -use serde_json::json; +use serde::Serialize; use super::{get_task_id, is_dry_run, SummarizedTaskView}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, Analytics}; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::{AuthenticationError, GuardedData}; @@ -29,21 +29,36 @@ pub struct SwapIndexesPayload { indexes: Vec, } +#[derive(Serialize)] +struct IndexSwappedAnalytics { + swap_operation_number: usize, +} + +impl Aggregate for IndexSwappedAnalytics { + fn event_name(&self) -> &'static str { + "Indexes Swapped" + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + swap_operation_number: self.swap_operation_number.max(new.swap_operation_number), + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + pub async fn swap_indexes( index_scheduler: GuardedData, Data>, params: AwebJson, DeserrJsonError>, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let params = params.into_inner(); - analytics.publish( - "Indexes Swapped".to_string(), - json!({ - "swap_operation_number": params.len(), - }), - Some(&req), - ); + analytics.publish(IndexSwappedAnalytics { swap_operation_number: params.len() }, &req); let filters = index_scheduler.filters(); let mut swaps = vec![]; diff --git a/meilisearch/src/routes/tasks.rs b/crates/meilisearch/src/routes/tasks.rs similarity index 88% rename from meilisearch/src/routes/tasks.rs rename to crates/meilisearch/src/routes/tasks.rs index 3dc6520af..95959d6d5 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/crates/meilisearch/src/routes/tasks.rs @@ -12,18 +12,17 @@ use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList}; use meilisearch_types::task_view::TaskView; use meilisearch_types::tasks::{Kind, KindWithContent, Status}; use serde::Serialize; -use serde_json::json; use time::format_description::well_known::Rfc3339; use time::macros::format_description; use time::{Date, Duration, OffsetDateTime, Time}; use tokio::task; use super::{get_task_id, is_dry_run, SummarizedTaskView}; -use crate::analytics::Analytics; +use crate::analytics::{Aggregate, AggregateMethod, Analytics}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::Opt; +use crate::{aggregate_methods, Opt}; const DEFAULT_LIMIT: u32 = 20; @@ -158,12 +157,69 @@ impl TaskDeletionOrCancelationQuery { } } +aggregate_methods!( + CancelTasks => "Tasks Canceled", + DeleteTasks => "Tasks Deleted", +); + +#[derive(Serialize)] +struct TaskFilterAnalytics { + filtered_by_uid: bool, + filtered_by_index_uid: bool, + filtered_by_type: bool, + filtered_by_status: bool, + filtered_by_canceled_by: bool, + filtered_by_before_enqueued_at: bool, + filtered_by_after_enqueued_at: bool, + filtered_by_before_started_at: bool, + filtered_by_after_started_at: bool, + filtered_by_before_finished_at: bool, + filtered_by_after_finished_at: bool, + + #[serde(skip)] + marker: std::marker::PhantomData, +} + +impl Aggregate for TaskFilterAnalytics { + fn event_name(&self) -> &'static str { + Method::event_name() + } + + fn aggregate(self: Box, new: Box) -> Box { + Box::new(Self { + filtered_by_uid: self.filtered_by_uid | new.filtered_by_uid, + filtered_by_index_uid: self.filtered_by_index_uid | new.filtered_by_index_uid, + filtered_by_type: self.filtered_by_type | new.filtered_by_type, + filtered_by_status: self.filtered_by_status | new.filtered_by_status, + filtered_by_canceled_by: self.filtered_by_canceled_by | new.filtered_by_canceled_by, + filtered_by_before_enqueued_at: self.filtered_by_before_enqueued_at + | new.filtered_by_before_enqueued_at, + filtered_by_after_enqueued_at: self.filtered_by_after_enqueued_at + | new.filtered_by_after_enqueued_at, + filtered_by_before_started_at: self.filtered_by_before_started_at + | new.filtered_by_before_started_at, + filtered_by_after_started_at: self.filtered_by_after_started_at + | new.filtered_by_after_started_at, + filtered_by_before_finished_at: self.filtered_by_before_finished_at + | new.filtered_by_before_finished_at, + filtered_by_after_finished_at: self.filtered_by_after_finished_at + | new.filtered_by_after_finished_at, + + marker: std::marker::PhantomData, + }) + } + + fn into_event(self: Box) -> serde_json::Value { + serde_json::to_value(*self).unwrap_or_default() + } +} + async fn cancel_tasks( index_scheduler: GuardedData, Data>, params: AwebQueryParameter, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -172,21 +228,22 @@ async fn cancel_tasks( } analytics.publish( - "Tasks Canceled".to_string(), - json!({ - "filtered_by_uid": params.uids.is_some(), - "filtered_by_index_uid": params.index_uids.is_some(), - "filtered_by_type": params.types.is_some(), - "filtered_by_status": params.statuses.is_some(), - "filtered_by_canceled_by": params.canceled_by.is_some(), - "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(), - "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(), - "filtered_by_before_started_at": params.before_started_at.is_some(), - "filtered_by_after_started_at": params.after_started_at.is_some(), - "filtered_by_before_finished_at": params.before_finished_at.is_some(), - "filtered_by_after_finished_at": params.after_finished_at.is_some(), - }), - Some(&req), + TaskFilterAnalytics:: { + filtered_by_uid: params.uids.is_some(), + filtered_by_index_uid: params.index_uids.is_some(), + filtered_by_type: params.types.is_some(), + filtered_by_status: params.statuses.is_some(), + filtered_by_canceled_by: params.canceled_by.is_some(), + filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(), + filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(), + filtered_by_before_started_at: params.before_started_at.is_some(), + filtered_by_after_started_at: params.after_started_at.is_some(), + filtered_by_before_finished_at: params.before_finished_at.is_some(), + filtered_by_after_finished_at: params.after_finished_at.is_some(), + + marker: std::marker::PhantomData, + }, + &req, ); let query = params.into_query(); @@ -214,7 +271,7 @@ async fn delete_tasks( params: AwebQueryParameter, req: HttpRequest, opt: web::Data, - analytics: web::Data, + analytics: web::Data, ) -> Result { let params = params.into_inner(); @@ -223,22 +280,24 @@ async fn delete_tasks( } analytics.publish( - "Tasks Deleted".to_string(), - json!({ - "filtered_by_uid": params.uids.is_some(), - "filtered_by_index_uid": params.index_uids.is_some(), - "filtered_by_type": params.types.is_some(), - "filtered_by_status": params.statuses.is_some(), - "filtered_by_canceled_by": params.canceled_by.is_some(), - "filtered_by_before_enqueued_at": params.before_enqueued_at.is_some(), - "filtered_by_after_enqueued_at": params.after_enqueued_at.is_some(), - "filtered_by_before_started_at": params.before_started_at.is_some(), - "filtered_by_after_started_at": params.after_started_at.is_some(), - "filtered_by_before_finished_at": params.before_finished_at.is_some(), - "filtered_by_after_finished_at": params.after_finished_at.is_some(), - }), - Some(&req), + TaskFilterAnalytics:: { + filtered_by_uid: params.uids.is_some(), + filtered_by_index_uid: params.index_uids.is_some(), + filtered_by_type: params.types.is_some(), + filtered_by_status: params.statuses.is_some(), + filtered_by_canceled_by: params.canceled_by.is_some(), + filtered_by_before_enqueued_at: params.before_enqueued_at.is_some(), + filtered_by_after_enqueued_at: params.after_enqueued_at.is_some(), + filtered_by_before_started_at: params.before_started_at.is_some(), + filtered_by_after_started_at: params.after_started_at.is_some(), + filtered_by_before_finished_at: params.before_finished_at.is_some(), + filtered_by_after_finished_at: params.after_finished_at.is_some(), + + marker: std::marker::PhantomData, + }, + &req, ); + let query = params.into_query(); let (tasks, _) = index_scheduler.get_task_ids_from_authorized_indexes( @@ -616,7 +675,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -628,7 +687,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/src/search/federated.rs b/crates/meilisearch/src/search/federated.rs similarity index 100% rename from meilisearch/src/search/federated.rs rename to crates/meilisearch/src/search/federated.rs diff --git a/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs similarity index 99% rename from meilisearch/src/search/mod.rs rename to crates/meilisearch/src/search/mod.rs index 4e25f3a1a..c873ab387 100644 --- a/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -1195,8 +1195,13 @@ impl<'a> HitMaker<'a> { let vectors_is_hidden = match (&displayed_ids, vectors_fid) { // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid (None, _) => false, - // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field - (Some(_), None) => true, + // vectors has no fid, so check its explicit name + (Some(_), None) => { + // unwrap as otherwise we'd go to the first one + let displayed_names = index.displayed_fields(rtxn)?.unwrap(); + !displayed_names + .contains(&milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME) + } // displayed_ids is a finit list, so hide if `_vectors` is not part of it (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), }; diff --git a/meilisearch/src/search/ranking_rules.rs b/crates/meilisearch/src/search/ranking_rules.rs similarity index 100% rename from meilisearch/src/search/ranking_rules.rs rename to crates/meilisearch/src/search/ranking_rules.rs diff --git a/meilisearch/src/search_queue.rs b/crates/meilisearch/src/search_queue.rs similarity index 100% rename from meilisearch/src/search_queue.rs rename to crates/meilisearch/src/search_queue.rs diff --git a/meilisearch/tests/assets/dumps/v1/metadata.json b/crates/meilisearch/tests/assets/dumps/v1/metadata.json similarity index 100% rename from meilisearch/tests/assets/dumps/v1/metadata.json rename to crates/meilisearch/tests/assets/dumps/v1/metadata.json diff --git a/meilisearch/tests/assets/dumps/v1/test/documents.jsonl b/crates/meilisearch/tests/assets/dumps/v1/test/documents.jsonl similarity index 100% rename from meilisearch/tests/assets/dumps/v1/test/documents.jsonl rename to crates/meilisearch/tests/assets/dumps/v1/test/documents.jsonl diff --git a/meilisearch/tests/assets/dumps/v1/test/settings.json b/crates/meilisearch/tests/assets/dumps/v1/test/settings.json similarity index 100% rename from meilisearch/tests/assets/dumps/v1/test/settings.json rename to crates/meilisearch/tests/assets/dumps/v1/test/settings.json diff --git a/meilisearch/tests/assets/dumps/v1/test/updates.jsonl b/crates/meilisearch/tests/assets/dumps/v1/test/updates.jsonl similarity index 100% rename from meilisearch/tests/assets/dumps/v1/test/updates.jsonl rename to crates/meilisearch/tests/assets/dumps/v1/test/updates.jsonl diff --git a/meilisearch/tests/assets/test_set.json b/crates/meilisearch/tests/assets/test_set.json similarity index 100% rename from meilisearch/tests/assets/test_set.json rename to crates/meilisearch/tests/assets/test_set.json diff --git a/meilisearch/tests/assets/test_set.ndjson b/crates/meilisearch/tests/assets/test_set.ndjson similarity index 100% rename from meilisearch/tests/assets/test_set.ndjson rename to crates/meilisearch/tests/assets/test_set.ndjson diff --git a/meilisearch/tests/assets/v1_v0.20.0_movies.dump b/crates/meilisearch/tests/assets/v1_v0.20.0_movies.dump similarity index 100% rename from meilisearch/tests/assets/v1_v0.20.0_movies.dump rename to crates/meilisearch/tests/assets/v1_v0.20.0_movies.dump diff --git a/meilisearch/tests/assets/v1_v0.20.0_movies_with_settings.dump b/crates/meilisearch/tests/assets/v1_v0.20.0_movies_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v1_v0.20.0_movies_with_settings.dump rename to crates/meilisearch/tests/assets/v1_v0.20.0_movies_with_settings.dump diff --git a/meilisearch/tests/assets/v1_v0.20.0_rubygems_with_settings.dump b/crates/meilisearch/tests/assets/v1_v0.20.0_rubygems_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v1_v0.20.0_rubygems_with_settings.dump rename to crates/meilisearch/tests/assets/v1_v0.20.0_rubygems_with_settings.dump diff --git a/meilisearch/tests/assets/v2_v0.21.1_movies.dump b/crates/meilisearch/tests/assets/v2_v0.21.1_movies.dump similarity index 100% rename from meilisearch/tests/assets/v2_v0.21.1_movies.dump rename to crates/meilisearch/tests/assets/v2_v0.21.1_movies.dump diff --git a/meilisearch/tests/assets/v2_v0.21.1_movies_with_settings.dump b/crates/meilisearch/tests/assets/v2_v0.21.1_movies_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v2_v0.21.1_movies_with_settings.dump rename to crates/meilisearch/tests/assets/v2_v0.21.1_movies_with_settings.dump diff --git a/meilisearch/tests/assets/v2_v0.21.1_rubygems_with_settings.dump b/crates/meilisearch/tests/assets/v2_v0.21.1_rubygems_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v2_v0.21.1_rubygems_with_settings.dump rename to crates/meilisearch/tests/assets/v2_v0.21.1_rubygems_with_settings.dump diff --git a/meilisearch/tests/assets/v3_v0.24.0_movies.dump b/crates/meilisearch/tests/assets/v3_v0.24.0_movies.dump similarity index 100% rename from meilisearch/tests/assets/v3_v0.24.0_movies.dump rename to crates/meilisearch/tests/assets/v3_v0.24.0_movies.dump diff --git a/meilisearch/tests/assets/v3_v0.24.0_movies_with_settings.dump b/crates/meilisearch/tests/assets/v3_v0.24.0_movies_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v3_v0.24.0_movies_with_settings.dump rename to crates/meilisearch/tests/assets/v3_v0.24.0_movies_with_settings.dump diff --git a/meilisearch/tests/assets/v3_v0.24.0_rubygems_with_settings.dump b/crates/meilisearch/tests/assets/v3_v0.24.0_rubygems_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v3_v0.24.0_rubygems_with_settings.dump rename to crates/meilisearch/tests/assets/v3_v0.24.0_rubygems_with_settings.dump diff --git a/meilisearch/tests/assets/v4_v0.25.2_movies.dump b/crates/meilisearch/tests/assets/v4_v0.25.2_movies.dump similarity index 100% rename from meilisearch/tests/assets/v4_v0.25.2_movies.dump rename to crates/meilisearch/tests/assets/v4_v0.25.2_movies.dump diff --git a/meilisearch/tests/assets/v4_v0.25.2_movies_with_settings.dump b/crates/meilisearch/tests/assets/v4_v0.25.2_movies_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v4_v0.25.2_movies_with_settings.dump rename to crates/meilisearch/tests/assets/v4_v0.25.2_movies_with_settings.dump diff --git a/meilisearch/tests/assets/v4_v0.25.2_rubygems_with_settings.dump b/crates/meilisearch/tests/assets/v4_v0.25.2_rubygems_with_settings.dump similarity index 100% rename from meilisearch/tests/assets/v4_v0.25.2_rubygems_with_settings.dump rename to crates/meilisearch/tests/assets/v4_v0.25.2_rubygems_with_settings.dump diff --git a/meilisearch/tests/assets/v5_v0.28.0_test_dump.dump b/crates/meilisearch/tests/assets/v5_v0.28.0_test_dump.dump similarity index 100% rename from meilisearch/tests/assets/v5_v0.28.0_test_dump.dump rename to crates/meilisearch/tests/assets/v5_v0.28.0_test_dump.dump diff --git a/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump b/crates/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump similarity index 100% rename from meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump rename to crates/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump diff --git a/meilisearch/tests/auth/api_keys.rs b/crates/meilisearch/tests/auth/api_keys.rs similarity index 100% rename from meilisearch/tests/auth/api_keys.rs rename to crates/meilisearch/tests/auth/api_keys.rs diff --git a/meilisearch/tests/auth/authorization.rs b/crates/meilisearch/tests/auth/authorization.rs similarity index 100% rename from meilisearch/tests/auth/authorization.rs rename to crates/meilisearch/tests/auth/authorization.rs diff --git a/meilisearch/tests/auth/errors.rs b/crates/meilisearch/tests/auth/errors.rs similarity index 100% rename from meilisearch/tests/auth/errors.rs rename to crates/meilisearch/tests/auth/errors.rs diff --git a/meilisearch/tests/auth/mod.rs b/crates/meilisearch/tests/auth/mod.rs similarity index 100% rename from meilisearch/tests/auth/mod.rs rename to crates/meilisearch/tests/auth/mod.rs diff --git a/meilisearch/tests/auth/payload.rs b/crates/meilisearch/tests/auth/payload.rs similarity index 100% rename from meilisearch/tests/auth/payload.rs rename to crates/meilisearch/tests/auth/payload.rs diff --git a/meilisearch/tests/auth/tenant_token.rs b/crates/meilisearch/tests/auth/tenant_token.rs similarity index 100% rename from meilisearch/tests/auth/tenant_token.rs rename to crates/meilisearch/tests/auth/tenant_token.rs diff --git a/meilisearch/tests/auth/tenant_token_multi_search.rs b/crates/meilisearch/tests/auth/tenant_token_multi_search.rs similarity index 100% rename from meilisearch/tests/auth/tenant_token_multi_search.rs rename to crates/meilisearch/tests/auth/tenant_token_multi_search.rs diff --git a/meilisearch/tests/common/encoder.rs b/crates/meilisearch/tests/common/encoder.rs similarity index 100% rename from meilisearch/tests/common/encoder.rs rename to crates/meilisearch/tests/common/encoder.rs diff --git a/meilisearch/tests/common/index.rs b/crates/meilisearch/tests/common/index.rs similarity index 96% rename from meilisearch/tests/common/index.rs rename to crates/meilisearch/tests/common/index.rs index 381bd1cb4..221333fd7 100644 --- a/meilisearch/tests/common/index.rs +++ b/crates/meilisearch/tests/common/index.rs @@ -9,8 +9,7 @@ use urlencoding::encode as urlencode; use super::encoder::Encoder; use super::service::Service; -use super::Value; -use super::{Owned, Shared}; +use super::{Owned, Shared, Value}; use crate::json; pub struct Index<'a, State = Owned> { @@ -272,6 +271,20 @@ impl<'a> Index<'a, Shared> { } (task, code) } + + pub async fn delete_index_fail(&self) -> (Value, StatusCode) { + let (mut task, code) = self._delete().await; + if code.is_success() { + task = self.wait_task(task.uid()).await; + if task.is_success() { + panic!( + "`delete_index_fail` succeeded: {}", + serde_json::to_string_pretty(&task).unwrap() + ); + } + } + (task, code) + } } #[allow(dead_code)] @@ -314,6 +327,12 @@ impl Index<'_, State> { }); self.service.post_encoded("/indexes", body, self.encoder).await } + + pub(super) async fn _delete(&self) -> (Value, StatusCode) { + let url = format!("/indexes/{}", urlencode(self.uid.as_ref())); + self.service.delete(url).await + } + pub async fn wait_task(&self, update_id: u64) -> Value { // try several times to get status, or panic to not wait forever let url = format!("/tasks/{}", update_id); diff --git a/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs similarity index 100% rename from meilisearch/tests/common/mod.rs rename to crates/meilisearch/tests/common/mod.rs diff --git a/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs similarity index 98% rename from meilisearch/tests/common/server.rs rename to crates/meilisearch/tests/common/server.rs index 6d331ebbc..5069c9ea6 100644 --- a/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -309,6 +309,11 @@ impl Server { } } + pub fn unique_index_with_encoder(&self, encoder: Encoder) -> Index<'_> { + let uuid = Uuid::new_v4(); + Index { uid: uuid.to_string(), service: &self.service, encoder, marker: PhantomData } + } + pub(super) async fn _create_index(&self, body: Value) -> (Value, StatusCode) { self.service.post("/indexes", body).await } @@ -381,7 +386,6 @@ pub fn default_settings(dir: impl AsRef) -> Opt { db_path: dir.as_ref().join("db"), dump_dir: dir.as_ref().join("dumps"), env: "development".to_owned(), - #[cfg(feature = "analytics")] no_analytics: true, max_index_size: Byte::from_u64_with_unit(100, Unit::MiB).unwrap(), max_task_db_size: Byte::from_u64_with_unit(1, Unit::GiB).unwrap(), diff --git a/meilisearch/tests/common/service.rs b/crates/meilisearch/tests/common/service.rs similarity index 97% rename from meilisearch/tests/common/service.rs rename to crates/meilisearch/tests/common/service.rs index 8addbacf8..c0b07c217 100644 --- a/meilisearch/tests/common/service.rs +++ b/crates/meilisearch/tests/common/service.rs @@ -9,8 +9,9 @@ use actix_web::test; use actix_web::test::TestRequest; use actix_web::web::Data; use index_scheduler::IndexScheduler; +use meilisearch::analytics::Analytics; use meilisearch::search_queue::SearchQueue; -use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; +use meilisearch::{create_app, Opt, SubscriberForSecondLayer}; use meilisearch_auth::AuthController; use tracing::level_filters::LevelFilter; use tracing_subscriber::Layer; @@ -141,7 +142,7 @@ impl Service { Data::new(search_queue), self.options.clone(), (route_layer_handle, stderr_layer_handle), - analytics::MockAnalytics::new(&self.options), + Data::new(Analytics::no_analytics()), true, )) .await diff --git a/meilisearch/tests/content_type.rs b/crates/meilisearch/tests/content_type.rs similarity index 100% rename from meilisearch/tests/content_type.rs rename to crates/meilisearch/tests/content_type.rs diff --git a/meilisearch/tests/dashboard/mod.rs b/crates/meilisearch/tests/dashboard/mod.rs similarity index 100% rename from meilisearch/tests/dashboard/mod.rs rename to crates/meilisearch/tests/dashboard/mod.rs diff --git a/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs similarity index 99% rename from meilisearch/tests/documents/add_documents.rs rename to crates/meilisearch/tests/documents/add_documents.rs index 819b2ddc2..c37b3a5e3 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/crates/meilisearch/tests/documents/add_documents.rs @@ -1023,7 +1023,7 @@ async fn error_document_add_create_index_bad_uid() { snapshot!(json_string!(response), @r###" { - "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -1280,7 +1280,7 @@ async fn error_add_documents_bad_document_id() { "indexedDocuments": 0 }, "error": { - "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", + "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_document_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_id" diff --git a/meilisearch/tests/documents/delete_documents.rs b/crates/meilisearch/tests/documents/delete_documents.rs similarity index 100% rename from meilisearch/tests/documents/delete_documents.rs rename to crates/meilisearch/tests/documents/delete_documents.rs diff --git a/meilisearch/tests/documents/errors.rs b/crates/meilisearch/tests/documents/errors.rs similarity index 100% rename from meilisearch/tests/documents/errors.rs rename to crates/meilisearch/tests/documents/errors.rs diff --git a/meilisearch/tests/documents/get_documents.rs b/crates/meilisearch/tests/documents/get_documents.rs similarity index 100% rename from meilisearch/tests/documents/get_documents.rs rename to crates/meilisearch/tests/documents/get_documents.rs diff --git a/meilisearch/tests/documents/mod.rs b/crates/meilisearch/tests/documents/mod.rs similarity index 100% rename from meilisearch/tests/documents/mod.rs rename to crates/meilisearch/tests/documents/mod.rs diff --git a/meilisearch/tests/documents/update_documents.rs b/crates/meilisearch/tests/documents/update_documents.rs similarity index 76% rename from meilisearch/tests/documents/update_documents.rs rename to crates/meilisearch/tests/documents/update_documents.rs index a5d466513..c0703e81b 100644 --- a/meilisearch/tests/documents/update_documents.rs +++ b/crates/meilisearch/tests/documents/update_documents.rs @@ -11,7 +11,7 @@ async fn error_document_update_create_index_bad_uid() { let (response, code) = index.update_documents(json!([{"id": 1}]), None).await; let expected_response = json!({ - "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -23,8 +23,8 @@ async fn error_document_update_create_index_bad_uid() { #[actix_rt::test] async fn document_update_with_primary_key() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = json!([ { @@ -32,15 +32,14 @@ async fn document_update_with_primary_key() { "content": "foo", } ]); - let (_response, code) = index.update_documents(documents, Some("primary")).await; + let (response, code) = index.update_documents(documents, Some("primary")).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); - let (response, code) = index.get_task(0).await; + let (response, code) = index.get_task(response.uid()).await; assert_eq!(code, 200); assert_eq!(response["status"], "succeeded"); - assert_eq!(response["uid"], 0); assert_eq!(response["type"], "documentAdditionOrUpdate"); assert_eq!(response["details"]["indexedDocuments"], 1); assert_eq!(response["details"]["receivedDocuments"], 1); @@ -52,8 +51,8 @@ async fn document_update_with_primary_key() { #[actix_rt::test] async fn update_document() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = json!([ { @@ -62,10 +61,10 @@ async fn update_document() { } ]); - let (_response, code) = index.add_documents(documents, None).await; + let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -77,9 +76,9 @@ async fn update_document() { let (response, code) = index.update_documents(documents, None).await; assert_eq!(code, 202, "response: {}", response); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); - let (response, code) = index.get_task(1).await; + let (response, code) = index.get_task(response.uid()).await; assert_eq!(code, 200); assert_eq!(response["status"], "succeeded"); @@ -96,8 +95,8 @@ async fn update_document() { #[actix_rt::test] async fn update_document_gzip_encoded() { - let server = Server::new().await; - let index = server.index_with_encoder("test", Encoder::Gzip); + let server = Server::new_shared(); + let index = server.unique_index_with_encoder(Encoder::Gzip); let documents = json!([ { @@ -106,10 +105,10 @@ async fn update_document_gzip_encoded() { } ]); - let (_response, code) = index.add_documents(documents, None).await; + let (response, code) = index.add_documents(documents, None).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -121,9 +120,9 @@ async fn update_document_gzip_encoded() { let (response, code) = index.update_documents(documents, None).await; assert_eq!(code, 202, "response: {}", response); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); - let (response, code) = index.get_task(1).await; + let (response, code) = index.get_task(response.uid()).await; assert_eq!(code, 200); assert_eq!(response["status"], "succeeded"); @@ -140,12 +139,12 @@ async fn update_document_gzip_encoded() { #[actix_rt::test] async fn update_larger_dataset() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(); - index.update_documents(documents, None).await; - index.wait_task(0).await; - let (response, code) = index.get_task(0).await; + let (task, _code) = index.update_documents(documents, None).await; + index.wait_task(task.uid()).await.succeeded(); + let (response, code) = index.get_task(task.uid()).await; assert_eq!(code, 200); assert_eq!(response["type"], "documentAdditionOrUpdate"); assert_eq!(response["details"]["indexedDocuments"], 77); @@ -158,8 +157,8 @@ async fn update_larger_dataset() { #[actix_rt::test] async fn error_update_documents_bad_document_id() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); index.create(Some("docid")).await; let documents = json!([ { @@ -167,13 +166,13 @@ async fn error_update_documents_bad_document_id() { "content": "foobar" } ]); - index.update_documents(documents, None).await; - let response = index.wait_task(1).await; + let (task, _code) = index.update_documents(documents, None).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], json!("failed")); assert_eq!( response["error"]["message"], json!( - r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."# + r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."# ) ); assert_eq!(response["error"]["code"], json!("invalid_document_id")); @@ -186,8 +185,8 @@ async fn error_update_documents_bad_document_id() { #[actix_rt::test] async fn error_update_documents_missing_document_id() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); index.create(Some("docid")).await; let documents = json!([ { @@ -195,8 +194,8 @@ async fn error_update_documents_missing_document_id() { "content": "foobar" } ]); - index.update_documents(documents, None).await; - let response = index.wait_task(1).await; + let (task, _code) = index.update_documents(documents, None).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); assert_eq!( response["error"]["message"], @@ -212,8 +211,8 @@ async fn error_update_documents_missing_document_id() { #[actix_rt::test] async fn update_faceted_document() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let (response, code) = index .update_settings(json!({ @@ -221,7 +220,7 @@ async fn update_faceted_document() { })) .await; assert_eq!("202", code.as_str(), "{:?}", response); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); let documents: Vec<_> = (0..1000) .map(|id| { @@ -232,10 +231,10 @@ async fn update_faceted_document() { }) .collect(); - let (_response, code) = index.add_documents(documents.into(), None).await; + let (response, code) = index.add_documents(documents.into(), None).await; assert_eq!(code, 202); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); let documents = json!([ { @@ -247,7 +246,7 @@ async fn update_faceted_document() { let (response, code) = index.update_documents(documents, None).await; assert_eq!(code, 202, "response: {}", response); - index.wait_task(2).await; + index.wait_task(response.uid()).await.succeeded(); index .search(json!({"limit": 10}), |response, code| { diff --git a/meilisearch/tests/dumps/data.rs b/crates/meilisearch/tests/dumps/data.rs similarity index 100% rename from meilisearch/tests/dumps/data.rs rename to crates/meilisearch/tests/dumps/data.rs diff --git a/meilisearch/tests/dumps/mod.rs b/crates/meilisearch/tests/dumps/mod.rs similarity index 100% rename from meilisearch/tests/dumps/mod.rs rename to crates/meilisearch/tests/dumps/mod.rs diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_raw/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_movie_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v1_rubygems_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_raw/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_movie_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v2_rubygems_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_raw/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_movie_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v3_rubygems_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_raw/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_movie_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v4_rubygems_with_settings/7.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/1.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/1.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/1.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/1.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/2.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/2.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/2.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/2.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/3.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/3.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/3.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/3.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/4.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/4.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/4.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/4.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/5.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/5.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/5.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/5.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/6.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/6.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/6.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/6.snap diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/7.snap b/crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/7.snap similarity index 100% rename from meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/7.snap rename to crates/meilisearch/tests/dumps/snapshots/mod.rs/import_dump_v5/7.snap diff --git a/meilisearch/tests/features/mod.rs b/crates/meilisearch/tests/features/mod.rs similarity index 100% rename from meilisearch/tests/features/mod.rs rename to crates/meilisearch/tests/features/mod.rs diff --git a/meilisearch/tests/index/create_index.rs b/crates/meilisearch/tests/index/create_index.rs similarity index 74% rename from meilisearch/tests/index/create_index.rs rename to crates/meilisearch/tests/index/create_index.rs index b51ccab51..9b9fbd039 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/crates/meilisearch/tests/index/create_index.rs @@ -9,15 +9,15 @@ use crate::json; #[actix_rt::test] async fn create_index_no_primary_key() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let (response, code) = index.create(None).await; assert_eq!(code, 202); assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(0).await; + let response = index.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -26,15 +26,15 @@ async fn create_index_no_primary_key() { #[actix_rt::test] async fn create_index_with_gzip_encoded_request() { - let server = Server::new().await; - let index = server.index_with_encoder("test", Encoder::Gzip); + let server = Server::new_shared(); + let index = server.unique_index_with_encoder(Encoder::Gzip); let (response, code) = index.create(None).await; assert_eq!(code, 202); assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(0).await; + let response = index.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -43,7 +43,7 @@ async fn create_index_with_gzip_encoded_request() { #[actix_rt::test] async fn create_index_with_gzip_encoded_request_and_receiving_brotli_encoded_response() { - let server = Server::new().await; + let server = Server::new_shared(); let app = server.init_web_app().await; let body = serde_json::to_string(&json!({ @@ -68,21 +68,20 @@ async fn create_index_with_gzip_encoded_request_and_receiving_brotli_encoded_res let parsed_response = serde_json::from_slice::(decoded.into().as_ref()).expect("Expecting valid json"); - assert_eq!(parsed_response["taskUid"], 0); assert_eq!(parsed_response["indexUid"], "test"); } #[actix_rt::test] async fn create_index_with_zlib_encoded_request() { - let server = Server::new().await; - let index = server.index_with_encoder("test", Encoder::Deflate); + let server = Server::new_shared(); + let index = server.unique_index_with_encoder(Encoder::Deflate); let (response, code) = index.create(None).await; assert_eq!(code, 202); assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(0).await; + let response = index.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -91,15 +90,15 @@ async fn create_index_with_zlib_encoded_request() { #[actix_rt::test] async fn create_index_with_brotli_encoded_request() { - let server = Server::new().await; - let index = server.index_with_encoder("test", Encoder::Brotli); + let server = Server::new_shared(); + let index = server.unique_index_with_encoder(Encoder::Brotli); let (response, code) = index.create(None).await; assert_eq!(code, 202); assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(0).await; + let response = index.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -108,15 +107,15 @@ async fn create_index_with_brotli_encoded_request() { #[actix_rt::test] async fn create_index_with_primary_key() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let (response, code) = index.create(Some("primary")).await; assert_eq!(code, 202); assert_eq!(response["status"], "enqueued"); - let response = index.wait_task(0).await; + let response = index.wait_task(response.uid()).await; assert_eq!(response["status"], "succeeded"); assert_eq!(response["type"], "indexCreation"); @@ -125,14 +124,25 @@ async fn create_index_with_primary_key() { #[actix_rt::test] async fn create_index_with_invalid_primary_key() { - let document = json!([ { "id": 2, "title": "Pride and Prejudice" } ]); + let documents = json!([ { "id": 2, "title": "Pride and Prejudice" } ]); - let server = Server::new().await; - let index = server.index("movies"); - let (_response, code) = index.add_documents(document, Some("title")).await; + let server = Server::new_shared(); + let index = server.unique_index(); + let (response, code) = index.add_documents(documents, Some("title")).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(response.uid()).await; + + let (response, code) = index.get().await; + assert_eq!(code, 200); + assert_eq!(response["primaryKey"], json!(null)); + + let documents = json!([ { "id": "e".repeat(513) } ]); + + let (response, code) = index.add_documents(documents, Some("id")).await; + assert_eq!(code, 202); + + index.wait_task(response.uid()).await; let (response, code) = index.get().await; assert_eq!(code, 200); @@ -141,19 +151,19 @@ async fn create_index_with_invalid_primary_key() { #[actix_rt::test] async fn test_create_multiple_indexes() { - let server = Server::new().await; - let index1 = server.index("test1"); - let index2 = server.index("test2"); - let index3 = server.index("test3"); - let index4 = server.index("test4"); + let server = Server::new_shared(); + let index1 = server.unique_index(); + let index2 = server.unique_index(); + let index3 = server.unique_index(); + let index4 = server.unique_index(); - index1.create(None).await; - index2.create(None).await; - index3.create(None).await; + let (task1, _) = index1.create(None).await; + let (task2, _) = index2.create(None).await; + let (task3, _) = index3.create(None).await; - index1.wait_task(0).await; - index1.wait_task(1).await; - index1.wait_task(2).await; + index1.wait_task(task1.uid()).await.succeeded(); + index2.wait_task(task2.uid()).await.succeeded(); + index3.wait_task(task3.uid()).await.succeeded(); assert_eq!(index1.get().await.1, 200); assert_eq!(index2.get().await.1, 200); @@ -163,18 +173,22 @@ async fn test_create_multiple_indexes() { #[actix_rt::test] async fn error_create_existing_index() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let (_, code) = index.create(Some("primary")).await; assert_eq!(code, 202); - index.create(Some("primary")).await; + let (task, _) = index.create(Some("primary")).await; - let response = index.wait_task(1).await; + let response = index.wait_task(task.uid()).await; + let msg = format!( + "Index `{}` already exists.", + task["indexUid"].as_str().expect("indexUid should exist").trim_matches('"') + ); let expected_response = json!({ - "message": "Index `test` already exists.", + "message": msg, "code": "index_already_exists", "type": "invalid_request", "link":"https://docs.meilisearch.com/errors#index_already_exists" @@ -192,7 +206,7 @@ async fn error_create_with_invalid_index_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/index/delete_index.rs b/crates/meilisearch/tests/index/delete_index.rs similarity index 66% rename from meilisearch/tests/index/delete_index.rs rename to crates/meilisearch/tests/index/delete_index.rs index e404a6003..03185d21a 100644 --- a/meilisearch/tests/index/delete_index.rs +++ b/crates/meilisearch/tests/index/delete_index.rs @@ -1,51 +1,50 @@ -use crate::common::Server; +use crate::common::{shared_does_not_exists_index, Server}; use crate::json; #[actix_rt::test] async fn create_and_delete_index() { - let server = Server::new().await; - let index = server.index("test"); - let (_response, code) = index.create(None).await; + let server = Server::new_shared(); + let index = server.unique_index(); + let (response, code) = index.create(None).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); assert_eq!(index.get().await.1, 200); - let (_response, code) = index.delete().await; + let (response, code) = index.delete().await; assert_eq!(code, 202); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); assert_eq!(index.get().await.1, 404); } #[actix_rt::test] async fn error_delete_unexisting_index() { - let server = Server::new().await; - let index = server.index("test"); - let (_, code) = index.delete().await; + let index = shared_does_not_exists_index().await; + let (task, code) = index.delete_index_fail().await; assert_eq!(code, 202); let expected_response = json!({ - "message": "Index `test` not found.", + "message": "Index `DOES_NOT_EXISTS` not found.", "code": "index_not_found", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#index_not_found" }); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); assert_eq!(response["error"], expected_response); } #[actix_rt::test] async fn loop_delete_add_documents() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = json!([{"id": 1, "field1": "hello"}]); let mut tasks = Vec::new(); for _ in 0..50 { diff --git a/meilisearch/tests/index/errors.rs b/crates/meilisearch/tests/index/errors.rs similarity index 96% rename from meilisearch/tests/index/errors.rs rename to crates/meilisearch/tests/index/errors.rs index 9c677ee12..3bab83955 100644 --- a/meilisearch/tests/index/errors.rs +++ b/crates/meilisearch/tests/index/errors.rs @@ -75,7 +75,7 @@ async fn create_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -136,7 +136,7 @@ async fn get_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -232,7 +232,7 @@ async fn update_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -247,7 +247,7 @@ async fn delete_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/index/get_index.rs b/crates/meilisearch/tests/index/get_index.rs similarity index 98% rename from meilisearch/tests/index/get_index.rs rename to crates/meilisearch/tests/index/get_index.rs index 5a184c8ce..ce08251be 100644 --- a/meilisearch/tests/index/get_index.rs +++ b/crates/meilisearch/tests/index/get_index.rs @@ -186,7 +186,7 @@ async fn get_invalid_index_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/index/mod.rs b/crates/meilisearch/tests/index/mod.rs similarity index 100% rename from meilisearch/tests/index/mod.rs rename to crates/meilisearch/tests/index/mod.rs diff --git a/meilisearch/tests/index/stats.rs b/crates/meilisearch/tests/index/stats.rs similarity index 100% rename from meilisearch/tests/index/stats.rs rename to crates/meilisearch/tests/index/stats.rs diff --git a/meilisearch/tests/index/update_index.rs b/crates/meilisearch/tests/index/update_index.rs similarity index 100% rename from meilisearch/tests/index/update_index.rs rename to crates/meilisearch/tests/index/update_index.rs diff --git a/meilisearch/tests/integration.rs b/crates/meilisearch/tests/integration.rs similarity index 100% rename from meilisearch/tests/integration.rs rename to crates/meilisearch/tests/integration.rs diff --git a/meilisearch/tests/logs/error.rs b/crates/meilisearch/tests/logs/error.rs similarity index 100% rename from meilisearch/tests/logs/error.rs rename to crates/meilisearch/tests/logs/error.rs diff --git a/meilisearch/tests/logs/mod.rs b/crates/meilisearch/tests/logs/mod.rs similarity index 96% rename from meilisearch/tests/logs/mod.rs rename to crates/meilisearch/tests/logs/mod.rs index 9f4649dca..26482b561 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/crates/meilisearch/tests/logs/mod.rs @@ -7,8 +7,9 @@ use std::str::FromStr; use actix_web::http::header::ContentType; use actix_web::web::Data; use meili_snap::snapshot; +use meilisearch::analytics::Analytics; use meilisearch::search_queue::SearchQueue; -use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; +use meilisearch::{create_app, Opt, SubscriberForSecondLayer}; use tracing::level_filters::LevelFilter; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::Layer; @@ -54,7 +55,7 @@ async fn basic_test_log_stream_route() { Data::new(search_queue), server.service.options.clone(), (route_layer_handle, stderr_layer_handle), - analytics::MockAnalytics::new(&server.service.options), + Data::new(Analytics::no_analytics()), true, )) .await; diff --git a/meilisearch/tests/search/distinct.rs b/crates/meilisearch/tests/search/distinct.rs similarity index 100% rename from meilisearch/tests/search/distinct.rs rename to crates/meilisearch/tests/search/distinct.rs diff --git a/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs similarity index 100% rename from meilisearch/tests/search/errors.rs rename to crates/meilisearch/tests/search/errors.rs diff --git a/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs similarity index 100% rename from meilisearch/tests/search/facet_search.rs rename to crates/meilisearch/tests/search/facet_search.rs diff --git a/meilisearch/tests/search/formatted.rs b/crates/meilisearch/tests/search/formatted.rs similarity index 100% rename from meilisearch/tests/search/formatted.rs rename to crates/meilisearch/tests/search/formatted.rs diff --git a/meilisearch/tests/search/geo.rs b/crates/meilisearch/tests/search/geo.rs similarity index 100% rename from meilisearch/tests/search/geo.rs rename to crates/meilisearch/tests/search/geo.rs diff --git a/meilisearch/tests/search/hybrid.rs b/crates/meilisearch/tests/search/hybrid.rs similarity index 95% rename from meilisearch/tests/search/hybrid.rs rename to crates/meilisearch/tests/search/hybrid.rs index e301c0b05..00a65d9aa 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/crates/meilisearch/tests/search/hybrid.rs @@ -568,6 +568,57 @@ async fn retrieve_vectors() { ] "###); + // use explicit `_vectors` in displayed attributes + let (response, code) = index + .update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} )) + .await; + assert_eq!(202, code, "{:?}", response); + index.wait_task(response.uid()).await; + + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), + ) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###" + [ + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2", + "_vectors": { + "default": { + "embeddings": "[vectors]", + "regenerate": true + } + } + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3", + "_vectors": { + "default": { + "embeddings": "[vectors]", + "regenerate": true + } + } + }, + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1", + "_vectors": { + "default": { + "embeddings": "[vectors]", + "regenerate": true + } + } + } + ] + "###); + // remove `_vectors` from displayed attributes let (response, code) = index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await; diff --git a/meilisearch/tests/search/locales.rs b/crates/meilisearch/tests/search/locales.rs similarity index 100% rename from meilisearch/tests/search/locales.rs rename to crates/meilisearch/tests/search/locales.rs diff --git a/meilisearch/tests/search/matching_strategy.rs b/crates/meilisearch/tests/search/matching_strategy.rs similarity index 100% rename from meilisearch/tests/search/matching_strategy.rs rename to crates/meilisearch/tests/search/matching_strategy.rs diff --git a/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs similarity index 100% rename from meilisearch/tests/search/mod.rs rename to crates/meilisearch/tests/search/mod.rs diff --git a/meilisearch/tests/search/multi.rs b/crates/meilisearch/tests/search/multi.rs similarity index 99% rename from meilisearch/tests/search/multi.rs rename to crates/meilisearch/tests/search/multi.rs index b9593f05f..eaa1da15f 100644 --- a/meilisearch/tests/search/multi.rs +++ b/crates/meilisearch/tests/search/multi.rs @@ -412,7 +412,7 @@ async fn simple_search_illegal_index_uid() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, @r###" { - "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -437,7 +437,7 @@ async fn federation_search_illegal_index_uid() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, @r###" { - "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/search/pagination.rs b/crates/meilisearch/tests/search/pagination.rs similarity index 100% rename from meilisearch/tests/search/pagination.rs rename to crates/meilisearch/tests/search/pagination.rs diff --git a/meilisearch/tests/search/restrict_searchable.rs b/crates/meilisearch/tests/search/restrict_searchable.rs similarity index 100% rename from meilisearch/tests/search/restrict_searchable.rs rename to crates/meilisearch/tests/search/restrict_searchable.rs diff --git a/meilisearch/tests/search/search_queue.rs b/crates/meilisearch/tests/search/search_queue.rs similarity index 100% rename from meilisearch/tests/search/search_queue.rs rename to crates/meilisearch/tests/search/search_queue.rs diff --git a/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap b/crates/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap similarity index 100% rename from meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap rename to crates/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap diff --git a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap b/crates/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap similarity index 100% rename from meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap rename to crates/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap diff --git a/meilisearch/tests/settings/distinct.rs b/crates/meilisearch/tests/settings/distinct.rs similarity index 100% rename from meilisearch/tests/settings/distinct.rs rename to crates/meilisearch/tests/settings/distinct.rs diff --git a/meilisearch/tests/settings/errors.rs b/crates/meilisearch/tests/settings/errors.rs similarity index 100% rename from meilisearch/tests/settings/errors.rs rename to crates/meilisearch/tests/settings/errors.rs diff --git a/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs similarity index 99% rename from meilisearch/tests/settings/get_settings.rs rename to crates/meilisearch/tests/settings/get_settings.rs index e99a9fa65..6de0db0b3 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -330,7 +330,7 @@ async fn error_update_setting_unexisting_index_invalid_uid() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/settings/mod.rs b/crates/meilisearch/tests/settings/mod.rs similarity index 100% rename from meilisearch/tests/settings/mod.rs rename to crates/meilisearch/tests/settings/mod.rs diff --git a/meilisearch/tests/settings/proximity_settings.rs b/crates/meilisearch/tests/settings/proximity_settings.rs similarity index 100% rename from meilisearch/tests/settings/proximity_settings.rs rename to crates/meilisearch/tests/settings/proximity_settings.rs diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/crates/meilisearch/tests/settings/tokenizer_customization.rs similarity index 100% rename from meilisearch/tests/settings/tokenizer_customization.rs rename to crates/meilisearch/tests/settings/tokenizer_customization.rs diff --git a/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs similarity index 99% rename from meilisearch/tests/similar/errors.rs rename to crates/meilisearch/tests/similar/errors.rs index 228358d54..1e933e1c0 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/crates/meilisearch/tests/similar/errors.rs @@ -79,7 +79,7 @@ async fn similar_bad_id() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", + "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" @@ -172,7 +172,7 @@ async fn similar_invalid_id() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", + "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" diff --git a/meilisearch/tests/similar/mod.rs b/crates/meilisearch/tests/similar/mod.rs similarity index 100% rename from meilisearch/tests/similar/mod.rs rename to crates/meilisearch/tests/similar/mod.rs diff --git a/meilisearch/tests/snapshot/mod.rs b/crates/meilisearch/tests/snapshot/mod.rs similarity index 100% rename from meilisearch/tests/snapshot/mod.rs rename to crates/meilisearch/tests/snapshot/mod.rs diff --git a/meilisearch/tests/stats/mod.rs b/crates/meilisearch/tests/stats/mod.rs similarity index 100% rename from meilisearch/tests/stats/mod.rs rename to crates/meilisearch/tests/stats/mod.rs diff --git a/meilisearch/tests/swap_indexes/errors.rs b/crates/meilisearch/tests/swap_indexes/errors.rs similarity index 100% rename from meilisearch/tests/swap_indexes/errors.rs rename to crates/meilisearch/tests/swap_indexes/errors.rs diff --git a/meilisearch/tests/swap_indexes/mod.rs b/crates/meilisearch/tests/swap_indexes/mod.rs similarity index 100% rename from meilisearch/tests/swap_indexes/mod.rs rename to crates/meilisearch/tests/swap_indexes/mod.rs diff --git a/meilisearch/tests/tasks/errors.rs b/crates/meilisearch/tests/tasks/errors.rs similarity index 99% rename from meilisearch/tests/tasks/errors.rs rename to crates/meilisearch/tests/tasks/errors.rs index c404a2329..42ec42997 100644 --- a/meilisearch/tests/tasks/errors.rs +++ b/crates/meilisearch/tests/tasks/errors.rs @@ -173,7 +173,7 @@ async fn task_bad_index_uids() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -184,7 +184,7 @@ async fn task_bad_index_uids() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -195,7 +195,7 @@ async fn task_bad_index_uids() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/tasks/mod.rs b/crates/meilisearch/tests/tasks/mod.rs similarity index 100% rename from meilisearch/tests/tasks/mod.rs rename to crates/meilisearch/tests/tasks/mod.rs diff --git a/meilisearch/tests/tasks/webhook.rs b/crates/meilisearch/tests/tasks/webhook.rs similarity index 100% rename from meilisearch/tests/tasks/webhook.rs rename to crates/meilisearch/tests/tasks/webhook.rs diff --git a/meilisearch/tests/vector/binary_quantized.rs b/crates/meilisearch/tests/vector/binary_quantized.rs similarity index 100% rename from meilisearch/tests/vector/binary_quantized.rs rename to crates/meilisearch/tests/vector/binary_quantized.rs diff --git a/meilisearch/tests/vector/intel_gen.txt.gz b/crates/meilisearch/tests/vector/intel_gen.txt.gz similarity index 100% rename from meilisearch/tests/vector/intel_gen.txt.gz rename to crates/meilisearch/tests/vector/intel_gen.txt.gz diff --git a/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs similarity index 100% rename from meilisearch/tests/vector/mod.rs rename to crates/meilisearch/tests/vector/mod.rs diff --git a/meilisearch/tests/vector/openai.rs b/crates/meilisearch/tests/vector/openai.rs similarity index 100% rename from meilisearch/tests/vector/openai.rs rename to crates/meilisearch/tests/vector/openai.rs diff --git a/meilisearch/tests/vector/openai_responses.json.gz b/crates/meilisearch/tests/vector/openai_responses.json.gz similarity index 100% rename from meilisearch/tests/vector/openai_responses.json.gz rename to crates/meilisearch/tests/vector/openai_responses.json.gz diff --git a/meilisearch/tests/vector/openai_tokenized_responses.json.gz b/crates/meilisearch/tests/vector/openai_tokenized_responses.json.gz similarity index 100% rename from meilisearch/tests/vector/openai_tokenized_responses.json.gz rename to crates/meilisearch/tests/vector/openai_tokenized_responses.json.gz diff --git a/meilisearch/tests/vector/rest.rs b/crates/meilisearch/tests/vector/rest.rs similarity index 100% rename from meilisearch/tests/vector/rest.rs rename to crates/meilisearch/tests/vector/rest.rs diff --git a/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs similarity index 74% rename from meilisearch/tests/vector/settings.rs rename to crates/meilisearch/tests/vector/settings.rs index 4f07ca18b..ed45913a8 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -4,6 +4,53 @@ use crate::common::{GetAllDocumentsOptions, Server}; use crate::json; use crate::vector::generate_default_user_provided_documents; +#[actix_rt::test] +async fn field_unavailable_for_source() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { "manual": {"source": "userProvided", "documentTemplate": "{{doc.documentTemplate}}"}}, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`, `rest`). Available fields: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { "default": {"source": "openAi", "revision": "42"}}, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `dimensions`, `distribution`, `url`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + #[actix_rt::test] async fn update_embedder() { let server = Server::new().await; diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap b/crates/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap similarity index 100% rename from meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap rename to crates/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap b/crates/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap similarity index 100% rename from meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap rename to crates/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap b/crates/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap similarity index 100% rename from meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap rename to crates/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap diff --git a/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml similarity index 75% rename from meilitool/Cargo.toml rename to crates/meilitool/Cargo.toml index ce6c1ad5b..048da6232 100644 --- a/meilitool/Cargo.toml +++ b/crates/meilitool/Cargo.toml @@ -16,5 +16,6 @@ file-store = { path = "../file-store" } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } serde = { version = "1.0.209", features = ["derive"] } -time = { version = "0.3.36", features = ["formatting"] } +time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] } uuid = { version = "1.10.0", features = ["v4"], default-features = false } +arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" } diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs new file mode 100644 index 000000000..978824356 --- /dev/null +++ b/crates/meilitool/src/main.rs @@ -0,0 +1,334 @@ +use std::fs::{read_dir, read_to_string, remove_file, File}; +use std::io::BufWriter; +use std::path::PathBuf; + +use anyhow::Context; +use clap::{Parser, Subcommand}; +use dump::{DumpWriter, IndexMetadata}; +use file_store::FileStore; +use meilisearch_auth::AuthController; +use meilisearch_types::heed::types::{SerdeJson, Str}; +use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; +use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::{obkv_to_json, BEU32}; +use meilisearch_types::tasks::{Status, Task}; +use meilisearch_types::versioning::{get_version, parse_version}; +use meilisearch_types::Index; +use time::macros::format_description; +use time::OffsetDateTime; +use upgrade::OfflineUpgrade; +use uuid_codec::UuidCodec; + +mod upgrade; +mod uuid_codec; + +#[derive(Parser)] +#[command(author, version, about, long_about = None)] +struct Cli { + /// The database path where the Meilisearch is running. + #[arg(long, default_value = "data.ms/")] + db_path: PathBuf, + + #[command(subcommand)] + command: Command, +} + +#[derive(Subcommand)] +enum Command { + /// Clears the task queue and make it empty. + /// + /// This command can be safely executed even if Meilisearch is running and processing tasks. + /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible, + /// even the ones that were processing. However, it's highly possible that you see the processing + /// tasks in the queue again with an associated internal error message. + ClearTaskQueue, + + /// Exports a dump from the Meilisearch database. + /// + /// Make sure to run this command when Meilisearch is not running or running but not processing tasks. + /// If tasks are being processed while a dump is being exported there are chances for the dump to be + /// malformed with missing tasks. + /// + /// TODO Verify this claim or make sure it cannot happen and we can export dumps + /// without caring about killing Meilisearch first! + ExportADump { + /// The directory in which the dump will be created. + #[arg(long, default_value = "dumps/")] + dump_dir: PathBuf, + + /// Skip dumping the enqueued or processing tasks. + /// + /// Can be useful when there are a lot of them and it is not particularly useful + /// to keep them. Note that only the enqueued tasks takes up space so skipping + /// the processed ones is not particularly interesting. + #[arg(long)] + skip_enqueued_tasks: bool, + }, + + /// Attempts to upgrade from one major version to the next without a dump. + /// + /// Make sure to run this commmand when Meilisearch is not running! + /// If Meilisearch is running while executing this command, the database could be corrupted + /// (contain data from both the old and the new versions) + /// + /// Supported upgrade paths: + /// + /// - v1.9.x -> v1.10.x -> v1.11.x + OfflineUpgrade { + #[arg(long)] + target_version: String, + }, +} + +fn main() -> anyhow::Result<()> { + let Cli { db_path, command } = Cli::parse(); + + let detected_version = get_version(&db_path).context("While checking the version file")?; + + match command { + Command::ClearTaskQueue => clear_task_queue(db_path), + Command::ExportADump { dump_dir, skip_enqueued_tasks } => { + export_a_dump(db_path, dump_dir, skip_enqueued_tasks) + } + Command::OfflineUpgrade { target_version } => { + let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; + OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() + } + } +} + +/// Clears the task queue located at `db_path`. +fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { + let path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) } + .with_context(|| format!("While trying to open {:?}", path.display()))?; + + eprintln!("Deleting tasks from the database..."); + + let mut wtxn = env.write_txn()?; + let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?; + let total = all_tasks.len(&wtxn)?; + let status = try_opening_poly_database(&env, &wtxn, "status")?; + let kind = try_opening_poly_database(&env, &wtxn, "kind")?; + let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?; + let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?; + let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?; + let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?; + let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?; + + try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?; + try_clearing_poly_database(&mut wtxn, status, "status")?; + try_clearing_poly_database(&mut wtxn, kind, "kind")?; + try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?; + try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?; + try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?; + try_clearing_poly_database(&mut wtxn, started_at, "started-at")?; + try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?; + + wtxn.commit().context("While committing the transaction")?; + + eprintln!("Successfully deleted {total} tasks from the tasks database!"); + eprintln!("Deleting the content files from disk..."); + + let mut count = 0usize; + let update_files = db_path.join("update_files"); + let entries = read_dir(&update_files).with_context(|| { + format!("While trying to read the content of {:?}", update_files.display()) + })?; + for result in entries { + match result { + Ok(ent) => match remove_file(ent.path()) { + Ok(_) => count += 1, + Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e), + }, + Err(e) => { + eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e) + } + } + } + + eprintln!("Successfully deleted {count} content files from disk!"); + + Ok(()) +} + +fn try_opening_database( + env: &Env, + rtxn: &RoTxn, + db_name: &str, +) -> anyhow::Result> { + env.open_database(rtxn, Some(db_name)) + .with_context(|| format!("While opening the {db_name:?} database"))? + .with_context(|| format!("Missing the {db_name:?} database")) +} + +fn try_opening_poly_database( + env: &Env, + rtxn: &RoTxn, + db_name: &str, +) -> anyhow::Result> { + env.database_options() + .name(db_name) + .open(rtxn) + .with_context(|| format!("While opening the {db_name:?} poly database"))? + .with_context(|| format!("Missing the {db_name:?} poly database")) +} + +fn try_clearing_poly_database( + wtxn: &mut RwTxn, + database: Database, + db_name: &str, +) -> anyhow::Result<()> { + database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database")) +} + +/// Exports a dump into the dump directory. +fn export_a_dump( + db_path: PathBuf, + dump_dir: PathBuf, + skip_enqueued_tasks: bool, +) -> Result<(), anyhow::Error> { + let started_at = OffsetDateTime::now_utc(); + + // 1. Extracts the instance UID from disk + let instance_uid_path = db_path.join("instance-uid"); + let instance_uid = match read_to_string(&instance_uid_path) { + Ok(content) => match content.trim().parse() { + Ok(uuid) => Some(uuid), + Err(e) => { + eprintln!("Impossible to parse instance-uid: {e}"); + None + } + }, + Err(e) => { + eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e); + None + } + }; + + let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?; + let file_store = + FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; + + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + eprintln!("Dumping the keys..."); + + // 2. dump the keys + let auth_store = AuthController::new(&db_path, &None) + .with_context(|| format!("While opening the auth store at {}", db_path.display()))?; + let mut dump_keys = dump.create_keys()?; + let mut count = 0; + for key in auth_store.list_keys()? { + dump_keys.push_key(&key)?; + count += 1; + } + dump_keys.flush()?; + + eprintln!("Successfully dumped {count} keys!"); + + let rtxn = env.read_txn()?; + let all_tasks: Database> = + try_opening_database(&env, &rtxn, "all-tasks")?; + let index_mapping: Database = + try_opening_database(&env, &rtxn, "index-mapping")?; + + if skip_enqueued_tasks { + eprintln!("Skip dumping the enqueued tasks..."); + } else { + eprintln!("Dumping the enqueued tasks..."); + + // 3. dump the tasks + let mut dump_tasks = dump.create_tasks_queue()?; + let mut count = 0; + for ret in all_tasks.iter(&rtxn)? { + let (_, t) = ret?; + let status = t.status; + let content_file = t.content_uuid(); + let mut dump_content_file = dump_tasks.push_task(&t.into())?; + + // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + if let Some(content_file_uuid) = content_file { + if status == Status::Enqueued { + let content_file = file_store.get_update(content_file_uuid)?; + + let reader = + DocumentsBatchReader::from_reader(content_file).with_context(|| { + format!("While reading content file {:?}", content_file_uuid) + })?; + + let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); + while let Some(doc) = cursor.next_document().with_context(|| { + format!("While iterating on content file {:?}", content_file_uuid) + })? { + dump_content_file + .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?; + } + dump_content_file.flush()?; + count += 1; + } + } + } + dump_tasks.flush()?; + + eprintln!("Successfully dumped {count} enqueued tasks!"); + } + + eprintln!("Dumping the indexes..."); + + // 4. Dump the indexes + let mut count = 0; + for result in index_mapping.iter(&rtxn)? { + let (uid, uuid) = result?; + let index_path = db_path.join("indexes").join(uuid.to_string()); + let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| { + format!("While trying to open the index at path {:?}", index_path.display()) + })?; + + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.to_owned(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + }; + let mut index_dumper = dump.create_index(uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + // 4.1. Dump the documents + for ret in index.all_documents(&rtxn)? { + let (_id, doc) = ret?; + let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; + index_dumper.push_document(&document)?; + } + + // 4.2. Dump the settings + let settings = meilisearch_types::settings::settings( + &index, + &rtxn, + meilisearch_types::settings::SecretPolicy::RevealSecrets, + )?; + index_dumper.settings(&settings)?; + count += 1; + } + + eprintln!("Successfully dumped {count} indexes!"); + // We will not dump experimental feature settings + eprintln!("The tool is not dumping experimental features, please set them by hand afterward"); + + let dump_uid = started_at.format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )).unwrap(); + + let path = dump_dir.join(format!("{}.dump", dump_uid)); + let file = File::create(&path)?; + dump.persist_to(BufWriter::new(file))?; + + eprintln!("Dump exported at path {:?}", path.display()); + + Ok(()) +} diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs new file mode 100644 index 000000000..36630c3b3 --- /dev/null +++ b/crates/meilitool/src/upgrade/mod.rs @@ -0,0 +1,73 @@ +mod v1_10; +mod v1_11; +mod v1_9; + +use std::path::{Path, PathBuf}; + +use anyhow::{bail, Context}; +use meilisearch_types::versioning::create_version_file; + +use v1_10::v1_9_to_v1_10; + +use crate::upgrade::v1_11::v1_10_to_v1_11; + +pub struct OfflineUpgrade { + pub db_path: PathBuf, + pub current_version: (String, String, String), + pub target_version: (String, String, String), +} + +impl OfflineUpgrade { + pub fn upgrade(self) -> anyhow::Result<()> { + let upgrade_list = [ + (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"), + (v1_10_to_v1_11, "1", "11", "0"), + ]; + + let (current_major, current_minor, current_patch) = &self.current_version; + + let start_at = match ( + current_major.as_str(), + current_minor.as_str(), + current_patch.as_str(), + ) { + ("1", "9", _) => 0, + ("1", "10", _) => 1, + _ => { + bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10") + } + }; + + let (target_major, target_minor, target_patch) = &self.target_version; + + let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { + ("1", "10", _) => 0, + ("1", "11", _) => 1, + (major, _, _) if major.starts_with('v') => { + bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.") + } + _ => { + bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10 and v1.11") + } + }; + + println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); + + #[allow(clippy::needless_range_loop)] + for index in start_at..=ends_at { + let (func, major, minor, patch) = upgrade_list[index]; + (func)(&self.db_path)?; + println!("Done"); + // We're writing the version file just in case an issue arise _while_ upgrading. + // We don't want the DB to fail in an unknown state. + println!("Writing VERSION file"); + + create_version_file(&self.db_path, major, minor, patch) + .context("while writing VERSION file after the upgrade")?; + } + + println!("Success"); + + Ok(()) + } +} diff --git a/crates/meilitool/src/upgrade/v1_10.rs b/crates/meilitool/src/upgrade/v1_10.rs new file mode 100644 index 000000000..3dd7c72a2 --- /dev/null +++ b/crates/meilitool/src/upgrade/v1_10.rs @@ -0,0 +1,289 @@ +use anyhow::bail; +use std::path::Path; + +use anyhow::Context; +use meilisearch_types::{ + heed::{ + types::{SerdeJson, Str}, + Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified, + }, + milli::index::{db_name, main_key}, +}; + +use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec}; + +use super::v1_9; + +pub type FieldDistribution = std::collections::BTreeMap; + +/// The statistics that can be computed from an `Index` object. +#[derive(serde::Serialize, serde::Deserialize, Debug)] +pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + #[serde(with = "time::serde::rfc3339")] + pub created_at: time::OffsetDateTime, + /// Date of the last update of the index. + #[serde(with = "time::serde::rfc3339")] + pub updated_at: time::OffsetDateTime, +} + +impl From for IndexStats { + fn from( + v1_9::IndexStats { + number_of_documents, + database_size, + used_database_size, + field_distribution, + created_at, + updated_at, + }: v1_9::IndexStats, + ) -> Self { + IndexStats { + number_of_documents, + database_size, + used_database_size, + field_distribution, + created_at: created_at.0, + updated_at: updated_at.0, + } + } +} + +#[derive(serde::Serialize, serde::Deserialize)] +#[serde(transparent)] +pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime); + +fn update_index_stats( + index_stats: Database, + index_uid: &str, + index_uuid: uuid::Uuid, + sched_wtxn: &mut RwTxn, +) -> anyhow::Result<()> { + let ctx = || format!("while updating index stats for index `{index_uid}`"); + + let stats: Option<&str> = index_stats + .remap_data_type::() + .get(sched_wtxn, &index_uuid) + .with_context(ctx) + .with_context(|| "While reading value")?; + dbg!(stats); + + let stats: Option = index_stats + .remap_data_type::>() + .get(sched_wtxn, &index_uuid) + .with_context(ctx) + .with_context(|| "While reading value")?; + + if let Some(stats) = stats { + let stats: self::IndexStats = stats.into(); + + index_stats + .remap_data_type::>() + .put(sched_wtxn, &index_uuid, &stats) + .with_context(ctx) + .with_context(|| "While writing value")?; + } + + Ok(()) +} + +fn update_date_format( + index_uid: &str, + index_env: &Env, + index_wtxn: &mut RwTxn, +) -> anyhow::Result<()> { + let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) + .with_context(|| format!("while updating date format for index `{index_uid}`"))?; + + date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?; + date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?; + + Ok(()) +} + +fn find_rest_embedders( + index_uid: &str, + index_env: &Env, + index_txn: &RoTxn, +) -> anyhow::Result> { + let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) + .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?; + + let mut rest_embedders = vec![]; + + for config in main + .remap_types::>>() + .get(index_txn, main_key::EMBEDDING_CONFIGS)? + .unwrap_or_default() + { + if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options { + rest_embedders.push(config.name); + } + } + + Ok(rest_embedders) +} + +fn date_round_trip( + wtxn: &mut RwTxn, + index_uid: &str, + db: Database, + key: &str, +) -> anyhow::Result<()> { + let datetime = + db.remap_types::>().get(wtxn, key).with_context( + || format!("could not read `{key}` while updating date format for index `{index_uid}`"), + )?; + + if let Some(datetime) = datetime { + db.remap_types::>() + .put(wtxn, key, &self::OffsetDateTime(datetime.0)) + .with_context(|| { + format!( + "could not write `{key}` while updating date format for index `{index_uid}`" + ) + })?; + } + + Ok(()) +} + +pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> { + println!("Upgrading from v1.9.0 to v1.10.0"); + // 2 changes here + + // 1. date format. needs to be done before opening the Index + // 2. REST embedders. We don't support this case right now, so bail + + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let mut sched_wtxn = env.write_txn()?; + + let index_mapping: Database = + try_opening_database(&env, &sched_wtxn, "index-mapping")?; + + let index_stats: Database = + try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| { + format!("While trying to open {:?}", index_scheduler_path.display()) + })?; + + let index_count = + index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?; + + // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn + // 1. immutably for the iteration + // 2. mutably for updating index stats + let indexes: Vec<_> = index_mapping + .iter(&sched_wtxn)? + .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) + .collect(); + + let mut rest_embedders = Vec::new(); + + let mut unwrapped_indexes = Vec::new(); + + // check that update can take place + for (index_index, result) in indexes.into_iter().enumerate() { + let (uid, uuid) = result?; + let index_path = db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + // FIXME: fetch the 25 magic number from the index file + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let index_txn = index_env.read_txn().with_context(|| { + format!( + "while obtaining a write transaction for index {uid} at {}", + index_path.display() + ) + })?; + + println!("\t- Checking for incompatible embedders (REST embedders)"); + let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?; + + if rest_embedders_for_index.is_empty() { + unwrapped_indexes.push((uid, uuid)); + } else { + // no need to add to unwrapped indexes because we'll exit early + rest_embedders.push((uid, rest_embedders_for_index)); + } + } + + if !rest_embedders.is_empty() { + let rest_embedders = rest_embedders + .into_iter() + .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders)) + .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`")) + .collect::>() + .join("\n"); + bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\ + The database has not been modified and is still a valid v1.9 database."); + } + + println!("Update can take place, updating"); + + for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() { + let index_path = db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Updating index `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + // FIXME: fetch the 25 magic number from the index file + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let mut index_wtxn = index_env.write_txn().with_context(|| { + format!( + "while obtaining a write transaction for index `{uid}` at `{}`", + index_path.display() + ) + })?; + + println!("\t- Updating index stats"); + update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?; + println!("\t- Updating date format"); + update_date_format(&uid, &index_env, &mut index_wtxn)?; + + index_wtxn.commit().with_context(|| { + format!("while committing the write txn for index `{uid}` at {}", index_path.display()) + })?; + } + + sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?; + + println!("Upgrading database succeeded"); + + Ok(()) +} diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs new file mode 100644 index 000000000..0c84d3842 --- /dev/null +++ b/crates/meilitool/src/upgrade/v1_11.rs @@ -0,0 +1,85 @@ +//! The breaking changes that happened between the v1.10 and the v1.11 are: +//! - Arroy went from the v0.4.0 to the v0.5.0, see this release note to get the whole context: https://github.com/meilisearch/arroy/releases/tag/v0.5.0 +//! - The `angular` distance has been renamed to `cosine` => We only need to update the string in the metadata. +//! - Reorganize the `NodeId` to make the appending of vectors work => We'll have to update the keys of almost all items in the DB. +//! - Store the list of updated IDs directly in LMDB instead of a roaring bitmap => This shouldn't be an issue since we are never supposed to commit this roaring bitmap, but it's not forbidden by arroy so ensuring it works is probably better than anything. + +use std::path::Path; + +use anyhow::Context; +use meilisearch_types::{ + heed::{types::Str, Database, EnvOpenOptions}, + milli::index::db_name, +}; + +use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec}; + +pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> { + println!("Upgrading from v1.10.0 to v1.11.0"); + + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let sched_rtxn = env.read_txn()?; + + let index_mapping: Database = + try_opening_database(&env, &sched_rtxn, "index-mapping")?; + + let index_count = + index_mapping.len(&sched_rtxn).context("while reading the number of indexes")?; + + let indexes: Vec<_> = index_mapping + .iter(&sched_rtxn)? + .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) + .collect(); + + for (index_index, result) in indexes.into_iter().enumerate() { + let (uid, uuid) = result?; + let index_path = db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Updating embeddings for `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let index_rtxn = index_env.read_txn().with_context(|| { + format!( + "while obtaining a read transaction for index {uid} at {}", + index_path.display() + ) + })?; + let index_read_database = + try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY) + .with_context(|| format!("while updating date format for index `{uid}`"))?; + + let mut index_wtxn = index_env.write_txn().with_context(|| { + format!( + "while obtaining a write transaction for index {uid} at {}", + index_path.display() + ) + })?; + + let index_write_database = + try_opening_poly_database(&index_env, &index_wtxn, db_name::VECTOR_ARROY) + .with_context(|| format!("while updating date format for index `{uid}`"))?; + + arroy_v04_to_v05::ugrade_from_prev_version( + &index_rtxn, + index_read_database, + &mut index_wtxn, + index_write_database, + )?; + + index_wtxn.commit()?; + } + + Ok(()) +} diff --git a/crates/meilitool/src/upgrade/v1_9.rs b/crates/meilitool/src/upgrade/v1_9.rs new file mode 100644 index 000000000..96cbfe68c --- /dev/null +++ b/crates/meilitool/src/upgrade/v1_9.rs @@ -0,0 +1,158 @@ +use serde::{Deserialize, Serialize}; +use time::{Date, OffsetDateTime, Time, UtcOffset}; + +pub type FieldDistribution = std::collections::BTreeMap; + +/// The statistics that can be computed from an `Index` object. +#[derive(serde::Deserialize, Debug)] +pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + pub created_at: LegacyDateTime, + /// Date of the last update of the index. + pub updated_at: LegacyDateTime, +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct IndexEmbeddingConfig { + pub name: String, + pub config: EmbeddingConfig, +} + +#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] +pub struct EmbeddingConfig { + /// Options of the embedder, specific to each kind of embedder + pub embedder_options: EmbedderOptions, +} + +/// Options of an embedder, specific to each kind of embedder. +#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +pub enum EmbedderOptions { + HuggingFace(hf::EmbedderOptions), + OpenAi(openai::EmbedderOptions), + Ollama(ollama::EmbedderOptions), + UserProvided(manual::EmbedderOptions), + Rest(rest::EmbedderOptions), +} + +impl Default for EmbedderOptions { + fn default() -> Self { + Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None }) + } +} + +mod hf { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub model: String, + pub revision: Option, + } +} +mod openai { + + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub api_key: Option, + pub dimensions: Option, + } +} +mod ollama { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub embedding_model: String, + pub url: Option, + pub api_key: Option, + } +} +mod manual { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub dimensions: usize, + } +} +mod rest { + #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)] + pub struct EmbedderOptions { + pub api_key: Option, + pub dimensions: Option, + pub url: String, + pub input_field: Vec, + // path to the array of embeddings + pub path_to_embeddings: Vec, + // shape of a single embedding + pub embedding_object: Vec, + } +} + +/// A datetime from Meilisearch v1.9 with an unspecified format. +#[derive(Debug)] +pub struct LegacyDateTime(pub OffsetDateTime); + +impl<'de> Deserialize<'de> for LegacyDateTime { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct Visitor; + impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = OffsetDateTime; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(formatter, "a valid datetime") + } + + // Comes from a binary. The legacy format is: + // 2024-11-04 13:32:08.48368 +00:00:00 + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, + { + let format = time::macros::format_description!("[year]-[month]-[day] [hour]:[minute]:[second].[subsecond] [offset_hour sign:mandatory]:[offset_minute]:[offset_second]"); + OffsetDateTime::parse(v, format).map_err(E::custom) + } + + // Comes from the docker image, the legacy format is: + // [2024, 309, 17, 15, 1, 698184971, 0,0,0] + // year, day in year, hour, minute, sec, subsec , offset stuff + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let mut vec = Vec::new(); + // We must deserialize the value as `i64` because the largest values are `u32` and `i32` + while let Some(el) = seq.next_element::()? { + vec.push(el); + } + if vec.len() != 9 { + return Err(serde::de::Error::custom(format!( + "Invalid datetime, received an array of {} elements instead of 9", + vec.len() + ))); + } + Ok(OffsetDateTime::new_in_offset( + Date::from_ordinal_date(vec[0] as i32, vec[1] as u16) + .map_err(serde::de::Error::custom)?, + Time::from_hms_nano(vec[2] as u8, vec[3] as u8, vec[4] as u8, vec[5] as u32) + .map_err(serde::de::Error::custom)?, + UtcOffset::from_hms(vec[6] as i8, vec[7] as i8, vec[8] as i8) + .map_err(serde::de::Error::custom)?, + )) + } + } + deserializer.deserialize_any(Visitor).map(LegacyDateTime) + } +} diff --git a/meilitool/src/uuid_codec.rs b/crates/meilitool/src/uuid_codec.rs similarity index 100% rename from meilitool/src/uuid_codec.rs rename to crates/meilitool/src/uuid_codec.rs diff --git a/milli/Cargo.toml b/crates/milli/Cargo.toml similarity index 90% rename from milli/Cargo.toml rename to crates/milli/Cargo.toml index 46633bdec..005393411 100644 --- a/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -16,7 +16,7 @@ big_s = "1.0.2" bimap = { version = "0.6.3", features = ["serde"] } bincode = "1.3.3" bstr = "1.9.1" -bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } +bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] } byteorder = "1.5.0" # charabia = { version = "0.9.0", default-features = false } charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false } @@ -88,16 +88,18 @@ rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838 "no_time", "sync", ] } -arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } +arroy = "0.5.0" rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } url = "2.5.2" rayon-par-bridge = "0.1.0" -hashbrown = "0.14.5" +hashbrown = "0.15.0" raw-collections = { git = "https://github.com/dureuill/raw-collections.git", version = "0.1.0" } bumpalo = "3.16.0" thread_local = "1.1.8" +allocator-api2 = "0.2.18" +rustc-hash = "2.0.0" [dev-dependencies] mimalloc = { version = "0.1.43", default-features = false } @@ -109,16 +111,7 @@ rand = { version = "0.8.5", features = ["small_rng"] } [features] all-tokenizations = [ - "charabia/chinese", - "charabia/hebrew", - "charabia/japanese", - "charabia/thai", - "charabia/korean", - "charabia/greek", - "charabia/khmer", - "charabia/vietnamese", - "charabia/swedish-recomposition", - "charabia/german-segmentation", + "charabia/default", ] # Use POSIX semaphores instead of SysV semaphores in LMDB @@ -157,5 +150,8 @@ german = ["charabia/german-segmentation"] # force swedish character recomposition swedish-recomposition = ["charabia/swedish-recomposition"] +# allow turkish specialized tokenization +turkish = ["charabia/turkish"] + # allow CUDA support, see cuda = ["candle-core/cuda"] diff --git a/milli/README.md b/crates/milli/README.md similarity index 100% rename from milli/README.md rename to crates/milli/README.md diff --git a/milli/examples/index.rs b/crates/milli/examples/index.rs similarity index 100% rename from milli/examples/index.rs rename to crates/milli/examples/index.rs diff --git a/milli/examples/search.rs b/crates/milli/examples/search.rs similarity index 100% rename from milli/examples/search.rs rename to crates/milli/examples/search.rs diff --git a/milli/examples/settings.rs b/crates/milli/examples/settings.rs similarity index 100% rename from milli/examples/settings.rs rename to crates/milli/examples/settings.rs diff --git a/milli/fuzz/.gitignore b/crates/milli/fuzz/.gitignore similarity index 100% rename from milli/fuzz/.gitignore rename to crates/milli/fuzz/.gitignore diff --git a/milli/src/asc_desc.rs b/crates/milli/src/asc_desc.rs similarity index 100% rename from milli/src/asc_desc.rs rename to crates/milli/src/asc_desc.rs diff --git a/milli/src/criterion.rs b/crates/milli/src/criterion.rs similarity index 100% rename from milli/src/criterion.rs rename to crates/milli/src/criterion.rs diff --git a/milli/src/documents/builder.rs b/crates/milli/src/documents/builder.rs similarity index 100% rename from milli/src/documents/builder.rs rename to crates/milli/src/documents/builder.rs diff --git a/milli/src/documents/enriched.rs b/crates/milli/src/documents/enriched.rs similarity index 100% rename from milli/src/documents/enriched.rs rename to crates/milli/src/documents/enriched.rs diff --git a/milli/src/documents/mod.rs b/crates/milli/src/documents/mod.rs similarity index 100% rename from milli/src/documents/mod.rs rename to crates/milli/src/documents/mod.rs diff --git a/milli/src/documents/primary_key.rs b/crates/milli/src/documents/primary_key.rs similarity index 97% rename from milli/src/documents/primary_key.rs rename to crates/milli/src/documents/primary_key.rs index 1831a2dbc..fb8b3d027 100644 --- a/milli/src/documents/primary_key.rs +++ b/crates/milli/src/documents/primary_key.rs @@ -279,12 +279,13 @@ fn starts_with(selector: &str, key: &str) -> bool { // FIXME: move to a DocumentId struct pub fn validate_document_id_str(document_id: &str) -> Option<&str> { - if !document_id.is_empty() - && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_')) + if document_id.is_empty() + || document_id.len() > 512 + || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') { - Some(document_id) - } else { None + } else { + Some(document_id) } } @@ -295,6 +296,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult Ok(s.to_string()), None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }), }, + // a `u64` or `i64` cannot be more than 512 bytes once converted to a string Value::Number(number) if !number.is_f64() => Ok(number.to_string()), content => Err(UserError::InvalidDocumentId { document_id: content }), } diff --git a/milli/src/documents/reader.rs b/crates/milli/src/documents/reader.rs similarity index 100% rename from milli/src/documents/reader.rs rename to crates/milli/src/documents/reader.rs diff --git a/milli/src/documents/serde_impl.rs b/crates/milli/src/documents/serde_impl.rs similarity index 100% rename from milli/src/documents/serde_impl.rs rename to crates/milli/src/documents/serde_impl.rs diff --git a/milli/src/error.rs b/crates/milli/src/error.rs similarity index 99% rename from milli/src/error.rs rename to crates/milli/src/error.rs index e6856c4ef..53d9827ac 100644 --- a/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -106,7 +106,8 @@ pub enum UserError { #[error( "Document identifier `{}` is invalid. \ A document identifier can be of type integer or string, \ -only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string() +only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \ +and can not be more than 512 bytes.", .document_id.to_string() )] InvalidDocumentId { document_id: Value }, #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))] @@ -297,6 +298,7 @@ impl From for Error { arroy::Error::InvalidVecDimension { expected, received } => { Error::UserError(UserError::InvalidVectorDimensions { expected, found: received }) } + arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation), arroy::Error::DatabaseFull | arroy::Error::InvalidItemAppend | arroy::Error::UnmatchingDistance { .. } diff --git a/milli/src/external_documents_ids.rs b/crates/milli/src/external_documents_ids.rs similarity index 100% rename from milli/src/external_documents_ids.rs rename to crates/milli/src/external_documents_ids.rs diff --git a/milli/src/facet/facet_type.rs b/crates/milli/src/facet/facet_type.rs similarity index 100% rename from milli/src/facet/facet_type.rs rename to crates/milli/src/facet/facet_type.rs diff --git a/milli/src/facet/facet_value.rs b/crates/milli/src/facet/facet_value.rs similarity index 100% rename from milli/src/facet/facet_value.rs rename to crates/milli/src/facet/facet_value.rs diff --git a/milli/src/facet/mod.rs b/crates/milli/src/facet/mod.rs similarity index 100% rename from milli/src/facet/mod.rs rename to crates/milli/src/facet/mod.rs diff --git a/milli/src/facet/value_encoding.rs b/crates/milli/src/facet/value_encoding.rs similarity index 100% rename from milli/src/facet/value_encoding.rs rename to crates/milli/src/facet/value_encoding.rs diff --git a/milli/src/fieldids_weights_map.rs b/crates/milli/src/fieldids_weights_map.rs similarity index 100% rename from milli/src/fieldids_weights_map.rs rename to crates/milli/src/fieldids_weights_map.rs diff --git a/milli/src/fields_ids_map.rs b/crates/milli/src/fields_ids_map.rs similarity index 100% rename from milli/src/fields_ids_map.rs rename to crates/milli/src/fields_ids_map.rs diff --git a/milli/src/fields_ids_map/global.rs b/crates/milli/src/fields_ids_map/global.rs similarity index 100% rename from milli/src/fields_ids_map/global.rs rename to crates/milli/src/fields_ids_map/global.rs diff --git a/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs similarity index 100% rename from milli/src/fields_ids_map/metadata.rs rename to crates/milli/src/fields_ids_map/metadata.rs diff --git a/milli/src/heed_codec/beu16_str_codec.rs b/crates/milli/src/heed_codec/beu16_str_codec.rs similarity index 100% rename from milli/src/heed_codec/beu16_str_codec.rs rename to crates/milli/src/heed_codec/beu16_str_codec.rs diff --git a/milli/src/heed_codec/beu32_str_codec.rs b/crates/milli/src/heed_codec/beu32_str_codec.rs similarity index 100% rename from milli/src/heed_codec/beu32_str_codec.rs rename to crates/milli/src/heed_codec/beu32_str_codec.rs diff --git a/milli/src/heed_codec/byte_slice_ref.rs b/crates/milli/src/heed_codec/byte_slice_ref.rs similarity index 100% rename from milli/src/heed_codec/byte_slice_ref.rs rename to crates/milli/src/heed_codec/byte_slice_ref.rs diff --git a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs b/crates/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs similarity index 100% rename from milli/src/heed_codec/facet/field_doc_id_facet_codec.rs rename to crates/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs diff --git a/milli/src/heed_codec/facet/mod.rs b/crates/milli/src/heed_codec/facet/mod.rs similarity index 100% rename from milli/src/heed_codec/facet/mod.rs rename to crates/milli/src/heed_codec/facet/mod.rs diff --git a/milli/src/heed_codec/facet/ordered_f64_codec.rs b/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs similarity index 100% rename from milli/src/heed_codec/facet/ordered_f64_codec.rs rename to crates/milli/src/heed_codec/facet/ordered_f64_codec.rs diff --git a/milli/src/heed_codec/field_id_word_count_codec.rs b/crates/milli/src/heed_codec/field_id_word_count_codec.rs similarity index 100% rename from milli/src/heed_codec/field_id_word_count_codec.rs rename to crates/milli/src/heed_codec/field_id_word_count_codec.rs diff --git a/milli/src/heed_codec/fst_set_codec.rs b/crates/milli/src/heed_codec/fst_set_codec.rs similarity index 100% rename from milli/src/heed_codec/fst_set_codec.rs rename to crates/milli/src/heed_codec/fst_set_codec.rs diff --git a/milli/src/heed_codec/mod.rs b/crates/milli/src/heed_codec/mod.rs similarity index 100% rename from milli/src/heed_codec/mod.rs rename to crates/milli/src/heed_codec/mod.rs diff --git a/milli/src/heed_codec/obkv_codec.rs b/crates/milli/src/heed_codec/obkv_codec.rs similarity index 100% rename from milli/src/heed_codec/obkv_codec.rs rename to crates/milli/src/heed_codec/obkv_codec.rs diff --git a/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs rename to crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs rename to crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs diff --git a/milli/src/heed_codec/roaring_bitmap/mod.rs b/crates/milli/src/heed_codec/roaring_bitmap/mod.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap/mod.rs rename to crates/milli/src/heed_codec/roaring_bitmap/mod.rs diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs rename to crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs diff --git a/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs rename to crates/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs diff --git a/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs rename to crates/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs diff --git a/milli/src/heed_codec/roaring_bitmap_length/mod.rs b/crates/milli/src/heed_codec/roaring_bitmap_length/mod.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap_length/mod.rs rename to crates/milli/src/heed_codec/roaring_bitmap_length/mod.rs diff --git a/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs similarity index 100% rename from milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs rename to crates/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs diff --git a/milli/src/heed_codec/str_beu32_codec.rs b/crates/milli/src/heed_codec/str_beu32_codec.rs similarity index 100% rename from milli/src/heed_codec/str_beu32_codec.rs rename to crates/milli/src/heed_codec/str_beu32_codec.rs diff --git a/milli/src/heed_codec/str_ref.rs b/crates/milli/src/heed_codec/str_ref.rs similarity index 100% rename from milli/src/heed_codec/str_ref.rs rename to crates/milli/src/heed_codec/str_ref.rs diff --git a/milli/src/heed_codec/str_str_u8_codec.rs b/crates/milli/src/heed_codec/str_str_u8_codec.rs similarity index 100% rename from milli/src/heed_codec/str_str_u8_codec.rs rename to crates/milli/src/heed_codec/str_str_u8_codec.rs diff --git a/milli/src/index.rs b/crates/milli/src/index.rs similarity index 98% rename from milli/src/index.rs rename to crates/milli/src/index.rs index 19064e8d7..08a8e36f8 100644 --- a/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1618,24 +1618,6 @@ impl Index { .unwrap_or_default()) } - pub fn arroy_readers<'a>( - &'a self, - rtxn: &'a RoTxn<'a>, - embedder_id: u8, - quantized: bool, - ) -> impl Iterator> + 'a { - crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); - // Here we don't care about the dimensions, but we want to know if we can read - // in the database or if its metadata are missing because there is no document with that many vectors. - match reader.dimensions(rtxn) { - Ok(_) => Some(Ok(reader)), - Err(arroy::Error::MissingMetadata(_)) => None, - Err(e) => Some(Err(e.into())), - } - }) - } - pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { self.main.remap_types::().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) } @@ -1657,14 +1639,9 @@ impl Index { let embedding_configs = self.embedding_configs(rtxn)?; for config in embedding_configs { let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); - let embeddings = self - .arroy_readers(rtxn, embedder_id, config.config.quantized()) - .map_while(|reader| { - reader - .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into())) - .transpose() - }) - .collect::>>()?; + let reader = + ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized()); + let embeddings = reader.item_vectors(rtxn, docid)?; res.insert(config.name.to_owned(), embeddings); } Ok(res) diff --git a/milli/src/lib.rs b/crates/milli/src/lib.rs similarity index 100% rename from milli/src/lib.rs rename to crates/milli/src/lib.rs diff --git a/milli/src/localized_attributes_rules.rs b/crates/milli/src/localized_attributes_rules.rs similarity index 100% rename from milli/src/localized_attributes_rules.rs rename to crates/milli/src/localized_attributes_rules.rs diff --git a/milli/src/order_by_map.rs b/crates/milli/src/order_by_map.rs similarity index 100% rename from milli/src/order_by_map.rs rename to crates/milli/src/order_by_map.rs diff --git a/milli/src/prompt/context.rs b/crates/milli/src/prompt/context.rs similarity index 100% rename from milli/src/prompt/context.rs rename to crates/milli/src/prompt/context.rs diff --git a/milli/src/prompt/document.rs b/crates/milli/src/prompt/document.rs similarity index 100% rename from milli/src/prompt/document.rs rename to crates/milli/src/prompt/document.rs diff --git a/milli/src/prompt/error.rs b/crates/milli/src/prompt/error.rs similarity index 100% rename from milli/src/prompt/error.rs rename to crates/milli/src/prompt/error.rs diff --git a/milli/src/prompt/fields.rs b/crates/milli/src/prompt/fields.rs similarity index 100% rename from milli/src/prompt/fields.rs rename to crates/milli/src/prompt/fields.rs diff --git a/milli/src/prompt/mod.rs b/crates/milli/src/prompt/mod.rs similarity index 100% rename from milli/src/prompt/mod.rs rename to crates/milli/src/prompt/mod.rs diff --git a/milli/src/prompt/template_checker.rs b/crates/milli/src/prompt/template_checker.rs similarity index 100% rename from milli/src/prompt/template_checker.rs rename to crates/milli/src/prompt/template_checker.rs diff --git a/milli/src/proximity.rs b/crates/milli/src/proximity.rs similarity index 100% rename from milli/src/proximity.rs rename to crates/milli/src/proximity.rs diff --git a/milli/src/score_details.rs b/crates/milli/src/score_details.rs similarity index 100% rename from milli/src/score_details.rs rename to crates/milli/src/score_details.rs diff --git a/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs similarity index 100% rename from milli/src/search/facet/facet_distribution.rs rename to crates/milli/src/search/facet/facet_distribution.rs diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/crates/milli/src/search/facet/facet_distribution_iter.rs similarity index 100% rename from milli/src/search/facet/facet_distribution_iter.rs rename to crates/milli/src/search/facet/facet_distribution_iter.rs diff --git a/milli/src/search/facet/facet_range_search.rs b/crates/milli/src/search/facet/facet_range_search.rs similarity index 100% rename from milli/src/search/facet/facet_range_search.rs rename to crates/milli/src/search/facet/facet_range_search.rs diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/crates/milli/src/search/facet/facet_sort_ascending.rs similarity index 100% rename from milli/src/search/facet/facet_sort_ascending.rs rename to crates/milli/src/search/facet/facet_sort_ascending.rs diff --git a/milli/src/search/facet/facet_sort_descending.rs b/crates/milli/src/search/facet/facet_sort_descending.rs similarity index 100% rename from milli/src/search/facet/facet_sort_descending.rs rename to crates/milli/src/search/facet/facet_sort_descending.rs diff --git a/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs similarity index 100% rename from milli/src/search/facet/filter.rs rename to crates/milli/src/search/facet/filter.rs diff --git a/milli/src/search/facet/mod.rs b/crates/milli/src/search/facet/mod.rs similarity index 100% rename from milli/src/search/facet/mod.rs rename to crates/milli/src/search/facet/mod.rs diff --git a/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs similarity index 100% rename from milli/src/search/facet/search.rs rename to crates/milli/src/search/facet/search.rs diff --git a/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap rename to crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap diff --git a/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap rename to crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap diff --git a/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap rename to crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap diff --git a/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap rename to crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_0.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_0.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_0.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_0.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_1.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_1.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_1.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_1.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_2.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_2.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_2.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_2.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_3.hash.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_3.hash.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_3.hash.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_3.hash.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_0.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_0.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_0.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_1.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_1.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_1.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_2.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_2.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_2.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_2.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_3.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_3.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_3.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_3.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_0.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_0.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_0.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_1.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_1.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_1.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_2.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_2.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_2.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_2.snap diff --git a/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_3.snap b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_3.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_3.snap rename to crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_3.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/0.snap b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/0.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/0.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/1.snap b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/1.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/1.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-0.snap b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-0.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-0.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-1.snap b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-1.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-1.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-0.snap b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-0.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-0.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-1.snap b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-1.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-1.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/0.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/0.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/0.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/1.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/1.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/1.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/2.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/2.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/2.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/2.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-0.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-0.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-0.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-1.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-1.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-1.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-0.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-0.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-0.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-0.snap diff --git a/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-1.snap b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-1.snap similarity index 100% rename from milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-1.snap rename to crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-1.snap diff --git a/milli/src/search/fst_utils.rs b/crates/milli/src/search/fst_utils.rs similarity index 100% rename from milli/src/search/fst_utils.rs rename to crates/milli/src/search/fst_utils.rs diff --git a/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs similarity index 100% rename from milli/src/search/hybrid.rs rename to crates/milli/src/search/hybrid.rs diff --git a/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs similarity index 100% rename from milli/src/search/mod.rs rename to crates/milli/src/search/mod.rs diff --git a/milli/src/search/new/bucket_sort.rs b/crates/milli/src/search/new/bucket_sort.rs similarity index 100% rename from milli/src/search/new/bucket_sort.rs rename to crates/milli/src/search/new/bucket_sort.rs diff --git a/milli/src/search/new/db_cache.rs b/crates/milli/src/search/new/db_cache.rs similarity index 100% rename from milli/src/search/new/db_cache.rs rename to crates/milli/src/search/new/db_cache.rs diff --git a/milli/src/search/new/distinct.rs b/crates/milli/src/search/new/distinct.rs similarity index 100% rename from milli/src/search/new/distinct.rs rename to crates/milli/src/search/new/distinct.rs diff --git a/milli/src/search/new/exact_attribute.rs b/crates/milli/src/search/new/exact_attribute.rs similarity index 100% rename from milli/src/search/new/exact_attribute.rs rename to crates/milli/src/search/new/exact_attribute.rs diff --git a/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs similarity index 100% rename from milli/src/search/new/geo_sort.rs rename to crates/milli/src/search/new/geo_sort.rs diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/crates/milli/src/search/new/graph_based_ranking_rule.rs similarity index 100% rename from milli/src/search/new/graph_based_ranking_rule.rs rename to crates/milli/src/search/new/graph_based_ranking_rule.rs diff --git a/milli/src/search/new/interner.rs b/crates/milli/src/search/new/interner.rs similarity index 100% rename from milli/src/search/new/interner.rs rename to crates/milli/src/search/new/interner.rs diff --git a/milli/src/search/new/limits.rs b/crates/milli/src/search/new/limits.rs similarity index 100% rename from milli/src/search/new/limits.rs rename to crates/milli/src/search/new/limits.rs diff --git a/milli/src/search/new/logger/mod.rs b/crates/milli/src/search/new/logger/mod.rs similarity index 100% rename from milli/src/search/new/logger/mod.rs rename to crates/milli/src/search/new/logger/mod.rs diff --git a/milli/src/search/new/logger/visual.rs b/crates/milli/src/search/new/logger/visual.rs similarity index 100% rename from milli/src/search/new/logger/visual.rs rename to crates/milli/src/search/new/logger/visual.rs diff --git a/crates/milli/src/search/new/matches/best_match_interval.rs b/crates/milli/src/search/new/matches/best_match_interval.rs new file mode 100644 index 000000000..a6497f351 --- /dev/null +++ b/crates/milli/src/search/new/matches/best_match_interval.rs @@ -0,0 +1,139 @@ +use super::matching_words::WordId; +use super::{Match, MatchPosition}; + +struct MatchIntervalWithScore { + interval: [usize; 2], + score: [i16; 3], +} + +// count score for phrases +fn tally_phrase_scores(fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16) { + let words_in_phrase_minus_one = (lwp - fwp) as i16; + // will always be ordered, so +1 for each space between words + *order_score += words_in_phrase_minus_one; + // distance will always be 1, so -1 for each space between words + *distance_score -= words_in_phrase_minus_one; +} + +/// Compute the score of a match interval: +/// 1) count unique matches +/// 2) calculate distance between matches +/// 3) count ordered matches +fn get_interval_score(matches: &[Match]) -> [i16; 3] { + let mut ids: Vec = Vec::with_capacity(matches.len()); + let mut order_score = 0; + let mut distance_score = 0; + + let mut iter = matches.iter().peekable(); + while let Some(m) = iter.next() { + if let Some(next_match) = iter.peek() { + // if matches are ordered + if next_match.ids.iter().min() > m.ids.iter().min() { + order_score += 1; + } + + let m_last_word_pos = match m.position { + MatchPosition::Word { word_position, .. } => word_position, + MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => { + tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score); + lwp + } + }; + let next_match_first_word_pos = next_match.get_first_word_pos(); + + // compute distance between matches + distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16; + } else if let MatchPosition::Phrase { word_positions: [fwp, lwp], .. } = m.position { + // in case last match is a phrase, count score for its words + tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score); + } + + ids.extend(m.ids.iter()); + } + + ids.sort_unstable(); + ids.dedup(); + let uniq_score = ids.len() as i16; + + // rank by unique match count, then by distance between matches, then by ordered match count. + [uniq_score, distance_score, order_score] +} + +/// Returns the first and last match where the score computed by match_interval_score is the best. +pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match; 2] { + if matches.is_empty() { + panic!("`matches` should not be empty at this point"); + } + + // positions of the first and the last match of the best matches interval in `matches`. + let mut best_interval: Option = None; + + let mut save_best_interval = |interval_first, interval_last| { + let interval_score = get_interval_score(&matches[interval_first..=interval_last]); + let is_interval_score_better = &best_interval + .as_ref() + .map_or(true, |MatchIntervalWithScore { score, .. }| interval_score > *score); + + if *is_interval_score_better { + best_interval = Some(MatchIntervalWithScore { + interval: [interval_first, interval_last], + score: interval_score, + }); + } + }; + + // we compute the matches interval if we have at least 2 matches. + // current interval positions. + let mut interval_first = 0; + let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos(); + + for (index, next_match) in matches.iter().enumerate() { + // if next match would make interval gross more than crop_size, + // we compare the current interval with the best one, + // then we increase `interval_first` until next match can be added. + let next_match_last_word_pos = next_match.get_last_word_pos(); + + // if the next match would mean that we pass the crop size window, + // we take the last valid match, that didn't pass this boundry, which is `index` - 1, + // and calculate a score for it, and check if it's better than our best so far + if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size { + // if index is 0 there is no last viable match + if index != 0 { + let interval_last = index - 1; + // keep interval if it's the best + save_best_interval(interval_first, interval_last); + } + + // advance start of the interval while interval is longer than crop_size. + loop { + interval_first += 1; + if interval_first == matches.len() { + interval_first -= 1; + break; + } + + interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos(); + + if interval_first_match_first_word_pos > next_match_last_word_pos + || next_match_last_word_pos - interval_first_match_first_word_pos < crop_size + { + break; + } + } + } + } + + // compute the last interval score and compare it to the best one. + let interval_last = matches.len() - 1; + // if it's the last match with itself, we need to make sure it's + // not a phrase longer than the crop window + if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size { + save_best_interval(interval_first, interval_last); + } + + // if none of the matches fit the criteria above, default to the first one + best_interval.map_or( + [&matches[0], &matches[0]], + |MatchIntervalWithScore { interval: [first, last], .. }| [&matches[first], &matches[last]], + ) +} diff --git a/crates/milli/src/search/new/matches/match.rs b/crates/milli/src/search/new/matches/match.rs new file mode 100644 index 000000000..2eef4d5a6 --- /dev/null +++ b/crates/milli/src/search/new/matches/match.rs @@ -0,0 +1,62 @@ +use super::matching_words::WordId; + +#[derive(Clone, Debug)] +pub enum MatchPosition { + Word { + // position of the word in the whole text. + word_position: usize, + // position of the token in the whole text. + token_position: usize, + }, + Phrase { + // position of the first and last word in the phrase in the whole text. + word_positions: [usize; 2], + // position of the first and last token in the phrase in the whole text. + token_positions: [usize; 2], + }, +} + +#[derive(Clone, Debug)] +pub struct Match { + pub char_count: usize, + // ids of the query words that matches. + pub ids: Vec, + pub position: MatchPosition, +} + +impl Match { + pub(super) fn get_first_word_pos(&self) -> usize { + match self.position { + MatchPosition::Word { word_position, .. } => word_position, + MatchPosition::Phrase { word_positions: [fwp, _], .. } => fwp, + } + } + + pub(super) fn get_last_word_pos(&self) -> usize { + match self.position { + MatchPosition::Word { word_position, .. } => word_position, + MatchPosition::Phrase { word_positions: [_, lwp], .. } => lwp, + } + } + + pub(super) fn get_first_token_pos(&self) -> usize { + match self.position { + MatchPosition::Word { token_position, .. } => token_position, + MatchPosition::Phrase { token_positions: [ftp, _], .. } => ftp, + } + } + + pub(super) fn get_last_token_pos(&self) -> usize { + match self.position { + MatchPosition::Word { token_position, .. } => token_position, + MatchPosition::Phrase { token_positions: [_, ltp], .. } => ltp, + } + } + + pub(super) fn get_word_count(&self) -> usize { + match self.position { + MatchPosition::Word { .. } => 1, + MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => lwp - fwp + 1, + } + } +} diff --git a/milli/src/search/new/matches/matching_words.rs b/crates/milli/src/search/new/matches/matching_words.rs similarity index 91% rename from milli/src/search/new/matches/matching_words.rs rename to crates/milli/src/search/new/matches/matching_words.rs index 4ad5c37ec..1f30a17ad 100644 --- a/milli/src/search/new/matches/matching_words.rs +++ b/crates/milli/src/search/new/matches/matching_words.rs @@ -86,14 +86,17 @@ impl MatchingWords { continue; }; let prefix_length = char_index + c.len_utf8(); - let char_len = token.original_lengths(prefix_length).0; + let (char_count, byte_len) = token.original_lengths(prefix_length); let ids = &located_words.positions; - return Some(MatchType::Full { char_len, ids }); + return Some(MatchType::Full { ids, char_count, byte_len }); // else we exact match the token. } else if token.lemma() == word { - let char_len = token.char_end - token.char_start; let ids = &located_words.positions; - return Some(MatchType::Full { char_len, ids }); + return Some(MatchType::Full { + char_count: token.char_end - token.char_start, + byte_len: token.byte_end - token.byte_start, + ids, + }); } } } @@ -130,7 +133,7 @@ impl<'a> Iterator for MatchesIter<'a, '_> { word.map(|word| self.matching_words.word_interner.get(word).as_str()) }) .collect(); - let partial = PartialMatch { matching_words: words, ids, char_len: 0 }; + let partial = PartialMatch { matching_words: words, ids }; partial.match_token(self.token).or_else(|| self.next()) } @@ -149,7 +152,7 @@ pub type WordId = u16; /// In these cases we need to match consecutively several tokens to consider that the match is full. #[derive(Debug, PartialEq)] pub enum MatchType<'a> { - Full { char_len: usize, ids: &'a RangeInclusive }, + Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive }, Partial(PartialMatch<'a>), } @@ -158,7 +161,6 @@ pub enum MatchType<'a> { pub struct PartialMatch<'a> { matching_words: Vec>, ids: &'a RangeInclusive, - char_len: usize, } impl<'a> PartialMatch<'a> { @@ -176,25 +178,24 @@ impl<'a> PartialMatch<'a> { None => token.is_stopword(), }; - let char_len = token.char_end - token.char_start; // if there are remaining words to match in the phrase and the current token is matching, // return a new Partial match allowing the highlighter to continue. if is_matching && matching_words.len() > 1 { matching_words.remove(0); - Some(MatchType::Partial(PartialMatch { matching_words, ids, char_len })) + Some(MatchType::Partial(Self { matching_words, ids })) // if there is no remaining word to match in the phrase and the current token is matching, // return a Full match. } else if is_matching { - Some(MatchType::Full { char_len, ids }) + Some(MatchType::Full { + char_count: token.char_end - token.char_start, + byte_len: token.byte_end - token.byte_start, + ids, + }) // if the current token doesn't match, return None to break the match sequence. } else { None } } - - pub fn char_len(&self) -> usize { - self.char_len - } } impl fmt::Debug for MatchingWords { @@ -276,7 +277,7 @@ pub(crate) mod tests { ..Default::default() }) .next(), - Some(MatchType::Full { char_len: 5, ids: &(0..=0) }) + Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(0..=0) }) ); assert_eq!( matching_words @@ -300,7 +301,7 @@ pub(crate) mod tests { ..Default::default() }) .next(), - Some(MatchType::Full { char_len: 5, ids: &(2..=2) }) + Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) }) ); assert_eq!( matching_words @@ -312,7 +313,7 @@ pub(crate) mod tests { ..Default::default() }) .next(), - Some(MatchType::Full { char_len: 5, ids: &(2..=2) }) + Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) }) ); assert_eq!( matching_words diff --git a/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs similarity index 74% rename from milli/src/search/new/matches/mod.rs rename to crates/milli/src/search/new/matches/mod.rs index 4688b8f32..80e3ec7b2 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -1,11 +1,19 @@ -use std::borrow::Cow; +mod best_match_interval; +mod r#match; +mod matching_words; +mod simple_token_kind; use charabia::{Language, SeparatorKind, Token, Tokenizer}; +use either::Either; pub use matching_words::MatchingWords; -use matching_words::{MatchType, PartialMatch, WordId}; +use matching_words::{MatchType, PartialMatch}; +use r#match::{Match, MatchPosition}; use serde::Serialize; - -pub mod matching_words; +use simple_token_kind::SimpleTokenKind; +use std::{ + borrow::Cow, + cmp::{max, min}, +}; const DEFAULT_CROP_MARKER: &str = "…"; const DEFAULT_HIGHLIGHT_PREFIX: &str = ""; @@ -93,17 +101,6 @@ impl FormatOptions { } } -#[derive(Clone, Debug)] -pub struct Match { - match_len: usize, - // ids of the query words that matches. - ids: Vec, - // position of the word in the whole text. - word_position: usize, - // position of the token in the whole text. - token_position: usize, -} - #[derive(Serialize, Debug, Clone, PartialEq, Eq)] pub struct MatchBounds { pub start: usize, @@ -130,41 +127,27 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { /// compute_partial_match peek into next words to validate if the match is complete. fn compute_partial_match<'a>( mut partial: PartialMatch<'a>, - token_position: usize, - word_position: usize, + first_token_position: usize, + first_word_position: usize, + first_word_char_start: &usize, words_positions: &mut impl Iterator)>, matches: &mut Vec, ) -> bool { - let mut potential_matches = vec![(token_position, word_position, partial.char_len())]; - for (token_position, word_position, word) in words_positions { partial = match partial.match_token(word) { // token matches the partial match, but the match is not full, // we temporarily save the current token then we try to match the next one. - Some(MatchType::Partial(partial)) => { - potential_matches.push((token_position, word_position, partial.char_len())); - partial - } + Some(MatchType::Partial(partial)) => partial, // partial match is now full, we keep this matches and we advance positions - Some(MatchType::Full { char_len, ids }) => { - let ids: Vec<_> = ids.clone().collect(); - // save previously matched tokens as matches. - let iter = potential_matches.into_iter().map( - |(token_position, word_position, match_len)| Match { - match_len, - ids: ids.clone(), - word_position, - token_position, - }, - ); - matches.extend(iter); - + Some(MatchType::Full { ids, .. }) => { // save the token that closes the partial match as a match. matches.push(Match { - match_len: char_len, - ids, - word_position, - token_position, + char_count: word.char_end - *first_word_char_start, + ids: ids.clone().collect(), + position: MatchPosition::Phrase { + word_positions: [first_word_position, word_position], + token_positions: [first_token_position, token_position], + }, }); // the match is complete, we return true. @@ -202,13 +185,12 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { match match_type { // we match, we save the current token as a match, // then we continue the rest of the tokens. - MatchType::Full { char_len, ids } => { + MatchType::Full { ids, char_count, .. } => { let ids: Vec<_> = ids.clone().collect(); matches.push(Match { - match_len: char_len, + char_count, ids, - word_position, - token_position, + position: MatchPosition::Word { word_position, token_position }, }); break; } @@ -221,6 +203,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { partial, token_position, word_position, + &word.char_start, &mut wp, &mut matches, ) { @@ -243,56 +226,99 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { Some((tokens, matches)) => matches .iter() .map(|m| MatchBounds { - start: tokens[m.token_position].byte_start, - length: m.match_len, + start: tokens[m.get_first_token_pos()].byte_start, + // TODO: Why is this in chars, while start is in bytes? + length: m.char_count, }) .collect(), } } /// Returns the bounds in byte index of the crop window. - fn crop_bounds( - &self, - tokens: &[Token<'_>], - matches: &[Match], - crop_size: usize, - ) -> (usize, usize) { - // if there is no match, we start from the beginning of the string by default. - let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0); - let first_match_token_position = matches.first().map(|m| m.token_position).unwrap_or(0); - let last_match_word_position = matches.last().map(|m| m.word_position).unwrap_or(0); - let last_match_token_position = matches.last().map(|m| m.token_position).unwrap_or(0); + fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] { + let ( + mut remaining_words, + is_iterating_forward, + before_tokens_starting_index, + after_tokens_starting_index, + ) = if !matches.is_empty() { + let [matches_first, matches_last] = + best_match_interval::find_best_match_interval(matches, crop_size); - // matches needs to be counted in the crop len. - let mut remaining_words = crop_size + first_match_word_position - last_match_word_position; + let matches_size = + matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1; + + let is_crop_size_gte_match_size = crop_size >= matches_size; + let is_iterating_forward = matches_size == 0 || is_crop_size_gte_match_size; + + let remaining_words = if is_crop_size_gte_match_size { + crop_size - matches_size + } else { + // in case matches size is greater than crop size, which implies there's only one match, + // we count words backwards, because we have to remove words, as they're extra words outside of + // crop window + matches_size - crop_size + }; + + let after_tokens_starting_index = if matches_size == 0 { + 0 + } else { + let last_match_last_token_position_plus_one = matches_last.get_last_token_pos() + 1; + if last_match_last_token_position_plus_one < tokens.len() { + last_match_last_token_position_plus_one + } else { + // we have matched the end of possible tokens, there's nothing to advance + tokens.len() - 1 + } + }; + + ( + remaining_words, + is_iterating_forward, + if is_iterating_forward { matches_first.get_first_token_pos() } else { 0 }, + after_tokens_starting_index, + ) + } else { + (crop_size, true, 0, 0) + }; // create the initial state of the crop window: 2 iterators starting from the matches positions, // a reverse iterator starting from the first match token position and going towards the beginning of the text, - let mut before_tokens = tokens[..first_match_token_position].iter().rev().peekable(); - // an iterator starting from the last match token position and going towards the end of the text. - let mut after_tokens = tokens[last_match_token_position..].iter().peekable(); + let mut before_tokens = tokens[..before_tokens_starting_index].iter().rev().peekable(); + // an iterator ... + let mut after_tokens = if is_iterating_forward { + // ... starting from the last match token position and going towards the end of the text. + Either::Left(tokens[after_tokens_starting_index..].iter().peekable()) + } else { + // ... starting from the last match token position and going towards the start of the text. + Either::Right(tokens[..=after_tokens_starting_index].iter().rev().peekable()) + }; // grows the crop window peeking in both directions // until the window contains the good number of words: while remaining_words > 0 { - let before_token = before_tokens.peek().map(|t| t.separator_kind()); - let after_token = after_tokens.peek().map(|t| t.separator_kind()); + let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new); + let after_token_kind = + after_tokens.as_mut().either(|v| v.peek(), |v| v.peek()).map(SimpleTokenKind::new); - match (before_token, after_token) { + match (before_token_kind, after_token_kind) { // we can expand both sides. - (Some(before_token), Some(after_token)) => { - match (before_token, after_token) { + (Some(before_token_kind), Some(after_token_kind)) => { + match (before_token_kind, after_token_kind) { // if they are both separators and are the same kind then advance both, // or expand in the soft separator separator side. - (Some(before_token_kind), Some(after_token_kind)) => { - if before_token_kind == after_token_kind { + ( + SimpleTokenKind::Separator(before_token_separator_kind), + SimpleTokenKind::Separator(after_token_separator_kind), + ) => { + if before_token_separator_kind == after_token_separator_kind { before_tokens.next(); // this avoid having an ending separator before crop marker. if remaining_words > 1 { after_tokens.next(); } - } else if before_token_kind == SeparatorKind::Hard { + } else if matches!(before_token_separator_kind, SeparatorKind::Hard) { after_tokens.next(); } else { before_tokens.next(); @@ -300,17 +326,17 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { } // if one of the tokens is a word, we expend in the side of the word. // left is a word, advance left. - (None, Some(_)) => { + (SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => { before_tokens.next(); remaining_words -= 1; } // right is a word, advance right. - (Some(_), None) => { + (SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => { after_tokens.next(); remaining_words -= 1; } // both are words, advance left then right if remaining_word > 0. - (None, None) => { + (SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => { before_tokens.next(); remaining_words -= 1; @@ -322,16 +348,16 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { } } // the end of the text is reached, advance left. - (Some(before_token), None) => { + (Some(before_token_kind), None) => { before_tokens.next(); - if before_token.is_none() { + if matches!(before_token_kind, SimpleTokenKind::NotSeparator) { remaining_words -= 1; } } // the start of the text is reached, advance right. - (None, Some(after_token)) => { + (None, Some(after_token_kind)) => { after_tokens.next(); - if after_token.is_none() { + if matches!(after_token_kind, SimpleTokenKind::NotSeparator) { remaining_words -= 1; } } @@ -344,86 +370,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end); let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start); - (crop_byte_start, crop_byte_end) - } - - /// Compute the score of a match interval: - /// 1) count unique matches - /// 2) calculate distance between matches - /// 3) count ordered matches - fn match_interval_score(&self, matches: &[Match]) -> (i16, i16, i16) { - let mut ids: Vec = Vec::with_capacity(matches.len()); - let mut order_score = 0; - let mut distance_score = 0; - - let mut iter = matches.iter().peekable(); - while let Some(m) = iter.next() { - if let Some(next_match) = iter.peek() { - // if matches are ordered - if next_match.ids.iter().min() > m.ids.iter().min() { - order_score += 1; - } - - // compute distance between matches - distance_score -= (next_match.word_position - m.word_position).min(7) as i16; - } - - ids.extend(m.ids.iter()); - } - - ids.sort_unstable(); - ids.dedup(); - let uniq_score = ids.len() as i16; - - // rank by unique match count, then by distance between matches, then by ordered match count. - (uniq_score, distance_score, order_score) - } - - /// Returns the matches interval where the score computed by match_interval_score is the best. - fn find_best_match_interval<'a>(&self, matches: &'a [Match], crop_size: usize) -> &'a [Match] { - // we compute the matches interval if we have at least 2 matches. - if matches.len() > 1 { - // positions of the first and the last match of the best matches interval in `matches`. - let mut best_interval = (0, 0); - let mut best_interval_score = self.match_interval_score(&matches[0..=0]); - // current interval positions. - let mut interval_first = 0; - let mut interval_last = 0; - for (index, next_match) in matches.iter().enumerate().skip(1) { - // if next match would make interval gross more than crop_size, - // we compare the current interval with the best one, - // then we increase `interval_first` until next match can be added. - if next_match.word_position - matches[interval_first].word_position >= crop_size { - let interval_score = - self.match_interval_score(&matches[interval_first..=interval_last]); - - // keep interval if it's the best - if interval_score > best_interval_score { - best_interval = (interval_first, interval_last); - best_interval_score = interval_score; - } - - // advance start of the interval while interval is longer than crop_size. - while next_match.word_position - matches[interval_first].word_position - >= crop_size - { - interval_first += 1; - } - } - interval_last = index; - } - - // compute the last interval score and compare it to the best one. - let interval_score = - self.match_interval_score(&matches[interval_first..=interval_last]); - if interval_score > best_interval_score { - best_interval = (interval_first, interval_last); - } - - &matches[best_interval.0..=best_interval.1] - } else { - matches - } + [crop_byte_start, crop_byte_end] } // Returns the formatted version of the original text. @@ -434,69 +381,87 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { } else { match &self.matches { Some((tokens, matches)) => { - // If the text has to be cropped, - // crop around the best interval. - let (byte_start, byte_end) = match format_options.crop { + // If the text has to be cropped, crop around the best interval. + let [crop_byte_start, crop_byte_end] = match format_options.crop { Some(crop_size) if crop_size > 0 => { - let matches = self.find_best_match_interval(matches, crop_size); self.crop_bounds(tokens, matches, crop_size) } - _ => (0, self.text.len()), + _ => [0, self.text.len()], }; let mut formatted = Vec::new(); // push crop marker if it's not the start of the text. - if byte_start > 0 && !self.crop_marker.is_empty() { + if crop_byte_start > 0 && !self.crop_marker.is_empty() { formatted.push(self.crop_marker); } - let mut byte_index = byte_start; + let mut byte_index = crop_byte_start; if format_options.highlight { // insert highlight markers around matches. for m in matches { - let token = &tokens[m.token_position]; + let [m_byte_start, m_byte_end] = match m.position { + MatchPosition::Word { token_position, .. } => { + let token = &tokens[token_position]; + [&token.byte_start, &token.byte_end] + } + MatchPosition::Phrase { token_positions: [ftp, ltp], .. } => { + [&tokens[ftp].byte_start, &tokens[ltp].byte_end] + } + }; - // skip matches out of the crop window. - if token.byte_start < byte_start || token.byte_end > byte_end { + // skip matches out of the crop window + if *m_byte_end < crop_byte_start || *m_byte_start > crop_byte_end { continue; } - if byte_index < token.byte_start { - formatted.push(&self.text[byte_index..token.byte_start]); + // adjust start and end to the crop window size + let [m_byte_start, m_byte_end] = [ + max(m_byte_start, &crop_byte_start), + min(m_byte_end, &crop_byte_end), + ]; + + // push text that is positioned before our matches + if byte_index < *m_byte_start { + formatted.push(&self.text[byte_index..*m_byte_start]); } - let highlight_byte_index = self.text[token.byte_start..] - .char_indices() - .enumerate() - .find(|(i, _)| *i == m.match_len) - .map_or(token.byte_end, |(_, (i, _))| i + token.byte_start); formatted.push(self.highlight_prefix); - formatted.push(&self.text[token.byte_start..highlight_byte_index]); + + // TODO: This is additional work done, charabia::token::Token byte_len + // should already get us the original byte length, however, that doesn't work as + // it's supposed to, investigate why + let highlight_byte_index = self.text[*m_byte_start..] + .char_indices() + .nth(m.char_count) + .map_or(*m_byte_end, |(i, _)| min(i + *m_byte_start, *m_byte_end)); + formatted.push(&self.text[*m_byte_start..highlight_byte_index]); + formatted.push(self.highlight_suffix); + // if it's a prefix highlight, we put the end of the word after the highlight marker. - if highlight_byte_index < token.byte_end { - formatted.push(&self.text[highlight_byte_index..token.byte_end]); + if highlight_byte_index < *m_byte_end { + formatted.push(&self.text[highlight_byte_index..*m_byte_end]); } - byte_index = token.byte_end; + byte_index = *m_byte_end; } } // push the rest of the text between last match and the end of crop. - if byte_index < byte_end { - formatted.push(&self.text[byte_index..byte_end]); + if byte_index < crop_byte_end { + formatted.push(&self.text[byte_index..crop_byte_end]); } // push crop marker if it's not the end of the text. - if byte_end < self.text.len() && !self.crop_marker.is_empty() { + if crop_byte_end < self.text.len() && !self.crop_marker.is_empty() { formatted.push(self.crop_marker); } if formatted.len() == 1 { // avoid concatenating if there is already 1 slice. - Cow::Borrowed(&self.text[byte_start..byte_end]) + Cow::Borrowed(&self.text[crop_byte_start..crop_byte_end]) } else { Cow::Owned(formatted.concat()) } @@ -821,22 +786,24 @@ mod tests { fn format_highlight_crop_phrase_query() { //! testing: https://github.com/meilisearch/meilisearch/issues/3975 let temp_index = TempIndex::new(); + + let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; temp_index .add_documents(documents!([ - { "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" } + { "id": 1, "text": text } ])) .unwrap(); + let rtxn = temp_index.read_txn().unwrap(); let format_options = FormatOptions { highlight: true, crop: Some(10) }; - let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!"; let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\""); let mut matcher = builder.build(text, None); // should return 10 words with a marker at the start as well the end, and the highlighted matches. insta::assert_snapshot!( matcher.format(format_options), - @"…had the power to split the world between those who…" + @"…the power to split the world between those who embraced…" ); let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\""); @@ -844,7 +811,63 @@ mod tests { // should highlight "those" and the phrase "and those". insta::assert_snapshot!( matcher.format(format_options), - @"…world between those who embraced progress and those who resisted…" + @"…world between those who embraced progress and those who resisted…" + ); + + let builder = MatcherBuilder::new_test( + &rtxn, + &temp_index, + "\"The groundbreaking invention had the power to split the world\"", + ); + let mut matcher = builder.build(text, None); + insta::assert_snapshot!( + matcher.format(format_options), + @"The groundbreaking invention had the power to split the world…" + ); + + let builder = MatcherBuilder::new_test( + &rtxn, + &temp_index, + "\"The groundbreaking invention had the power to split the world between those\"", + ); + let mut matcher = builder.build(text, None); + insta::assert_snapshot!( + matcher.format(format_options), + @"The groundbreaking invention had the power to split the world…" + ); + + let builder = MatcherBuilder::new_test( + &rtxn, + &temp_index, + "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"", + ); + let mut matcher = builder.build(text, None); + insta::assert_snapshot!( + matcher.format(format_options), + // TODO: Should include exclamation mark without crop markers + @"…between those who embraced progress and those who resisted change…" + ); + + let builder = MatcherBuilder::new_test( + &rtxn, + &temp_index, + "\"groundbreaking invention\" \"split the world between\"", + ); + let mut matcher = builder.build(text, None); + insta::assert_snapshot!( + matcher.format(format_options), + @"…groundbreaking invention had the power to split the world between…" + ); + + let builder = MatcherBuilder::new_test( + &rtxn, + &temp_index, + "\"groundbreaking invention\" \"had the power to split the world between those\"", + ); + let mut matcher = builder.build(text, None); + insta::assert_snapshot!( + matcher.format(format_options), + @"…invention had the power to split the world between those…" ); } @@ -900,7 +923,7 @@ mod tests { let mut matcher = builder.build(text, None); insta::assert_snapshot!( matcher.format(format_options), - @"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_" + @"_the_ _do or_ die can't be he do and or isn'_t he_" ); } } diff --git a/crates/milli/src/search/new/matches/simple_token_kind.rs b/crates/milli/src/search/new/matches/simple_token_kind.rs new file mode 100644 index 000000000..b34a8c985 --- /dev/null +++ b/crates/milli/src/search/new/matches/simple_token_kind.rs @@ -0,0 +1,15 @@ +use charabia::{SeparatorKind, Token, TokenKind}; + +pub enum SimpleTokenKind { + Separator(SeparatorKind), + NotSeparator, +} + +impl SimpleTokenKind { + pub fn new(token: &&Token<'_>) -> Self { + match token.kind { + TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind), + _ => Self::NotSeparator, + } + } +} diff --git a/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs similarity index 100% rename from milli/src/search/new/mod.rs rename to crates/milli/src/search/new/mod.rs diff --git a/milli/src/search/new/query_graph.rs b/crates/milli/src/search/new/query_graph.rs similarity index 100% rename from milli/src/search/new/query_graph.rs rename to crates/milli/src/search/new/query_graph.rs diff --git a/milli/src/search/new/query_term/compute_derivations.rs b/crates/milli/src/search/new/query_term/compute_derivations.rs similarity index 100% rename from milli/src/search/new/query_term/compute_derivations.rs rename to crates/milli/src/search/new/query_term/compute_derivations.rs diff --git a/milli/src/search/new/query_term/mod.rs b/crates/milli/src/search/new/query_term/mod.rs similarity index 100% rename from milli/src/search/new/query_term/mod.rs rename to crates/milli/src/search/new/query_term/mod.rs diff --git a/milli/src/search/new/query_term/ntypo_subset.rs b/crates/milli/src/search/new/query_term/ntypo_subset.rs similarity index 100% rename from milli/src/search/new/query_term/ntypo_subset.rs rename to crates/milli/src/search/new/query_term/ntypo_subset.rs diff --git a/milli/src/search/new/query_term/parse_query.rs b/crates/milli/src/search/new/query_term/parse_query.rs similarity index 100% rename from milli/src/search/new/query_term/parse_query.rs rename to crates/milli/src/search/new/query_term/parse_query.rs diff --git a/milli/src/search/new/query_term/phrase.rs b/crates/milli/src/search/new/query_term/phrase.rs similarity index 100% rename from milli/src/search/new/query_term/phrase.rs rename to crates/milli/src/search/new/query_term/phrase.rs diff --git a/milli/src/search/new/ranking_rule_graph/build.rs b/crates/milli/src/search/new/ranking_rule_graph/build.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/build.rs rename to crates/milli/src/search/new/ranking_rule_graph/build.rs diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/cheapest_paths.rs rename to crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs diff --git a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs b/crates/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs rename to crates/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs diff --git a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs b/crates/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs rename to crates/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/exactness/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/exactness/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/exactness/mod.rs diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/fid/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/mod.rs diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/position/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/position/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/position/mod.rs diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/crates/milli/src/search/new/ranking_rule_graph/proximity/build.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/proximity/build.rs rename to crates/milli/src/search/new/ranking_rule_graph/proximity/build.rs diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/crates/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs rename to crates/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/proximity/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/proximity/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/proximity/mod.rs diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/typo/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/typo/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/typo/mod.rs diff --git a/milli/src/search/new/ranking_rule_graph/words/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/words/mod.rs similarity index 100% rename from milli/src/search/new/ranking_rule_graph/words/mod.rs rename to crates/milli/src/search/new/ranking_rule_graph/words/mod.rs diff --git a/milli/src/search/new/ranking_rules.rs b/crates/milli/src/search/new/ranking_rules.rs similarity index 100% rename from milli/src/search/new/ranking_rules.rs rename to crates/milli/src/search/new/ranking_rules.rs diff --git a/milli/src/search/new/resolve_query_graph.rs b/crates/milli/src/search/new/resolve_query_graph.rs similarity index 100% rename from milli/src/search/new/resolve_query_graph.rs rename to crates/milli/src/search/new/resolve_query_graph.rs diff --git a/milli/src/search/new/small_bitmap.rs b/crates/milli/src/search/new/small_bitmap.rs similarity index 100% rename from milli/src/search/new/small_bitmap.rs rename to crates/milli/src/search/new/small_bitmap.rs diff --git a/milli/src/search/new/sort.rs b/crates/milli/src/search/new/sort.rs similarity index 100% rename from milli/src/search/new/sort.rs rename to crates/milli/src/search/new/sort.rs diff --git a/milli/src/search/new/tests/attribute_fid.rs b/crates/milli/src/search/new/tests/attribute_fid.rs similarity index 100% rename from milli/src/search/new/tests/attribute_fid.rs rename to crates/milli/src/search/new/tests/attribute_fid.rs diff --git a/milli/src/search/new/tests/attribute_position.rs b/crates/milli/src/search/new/tests/attribute_position.rs similarity index 100% rename from milli/src/search/new/tests/attribute_position.rs rename to crates/milli/src/search/new/tests/attribute_position.rs diff --git a/milli/src/search/new/tests/cutoff.rs b/crates/milli/src/search/new/tests/cutoff.rs similarity index 100% rename from milli/src/search/new/tests/cutoff.rs rename to crates/milli/src/search/new/tests/cutoff.rs diff --git a/milli/src/search/new/tests/distinct.rs b/crates/milli/src/search/new/tests/distinct.rs similarity index 100% rename from milli/src/search/new/tests/distinct.rs rename to crates/milli/src/search/new/tests/distinct.rs diff --git a/milli/src/search/new/tests/exactness.rs b/crates/milli/src/search/new/tests/exactness.rs similarity index 100% rename from milli/src/search/new/tests/exactness.rs rename to crates/milli/src/search/new/tests/exactness.rs diff --git a/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs similarity index 100% rename from milli/src/search/new/tests/geo_sort.rs rename to crates/milli/src/search/new/tests/geo_sort.rs diff --git a/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs similarity index 100% rename from milli/src/search/new/tests/integration.rs rename to crates/milli/src/search/new/tests/integration.rs diff --git a/milli/src/search/new/tests/language.rs b/crates/milli/src/search/new/tests/language.rs similarity index 100% rename from milli/src/search/new/tests/language.rs rename to crates/milli/src/search/new/tests/language.rs diff --git a/milli/src/search/new/tests/mod.rs b/crates/milli/src/search/new/tests/mod.rs similarity index 100% rename from milli/src/search/new/tests/mod.rs rename to crates/milli/src/search/new/tests/mod.rs diff --git a/milli/src/search/new/tests/ngram_split_words.rs b/crates/milli/src/search/new/tests/ngram_split_words.rs similarity index 100% rename from milli/src/search/new/tests/ngram_split_words.rs rename to crates/milli/src/search/new/tests/ngram_split_words.rs diff --git a/milli/src/search/new/tests/proximity.rs b/crates/milli/src/search/new/tests/proximity.rs similarity index 100% rename from milli/src/search/new/tests/proximity.rs rename to crates/milli/src/search/new/tests/proximity.rs diff --git a/milli/src/search/new/tests/proximity_typo.rs b/crates/milli/src/search/new/tests/proximity_typo.rs similarity index 100% rename from milli/src/search/new/tests/proximity_typo.rs rename to crates/milli/src/search/new/tests/proximity_typo.rs diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_simple.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_after_words.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_all_candidates_with_typo.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase-3.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_phrase.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_attribute_starts_with_simple.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_followed_by_typo_prefer_no_typo_prefix.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_ordered.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_random.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed-3.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__exactness_simple_reversed.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness-4.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__proximity_after_exactness.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__typo_followed_by_exactness.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__exactness__words_after_exactness.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort-4.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-10.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-12.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-14.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-16.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-18.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-20.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-4.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-6.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_around_the_edge_of_the_flat_earth-8.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_mixed_with_words-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_without_any_geo_faceted_documents-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-11.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-14.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_prefix_db-8.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__proximity__proximity_split_word-8.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__redacted-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-11.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__sort__sort-8.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-6.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__stop_words__stop_words_in_phrase-8.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_bucketing-8.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_attribute-4.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_exact_word-12.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_ranking_rule_not_preceded_by_words_ranking_rule-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo__typo_synonyms-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_basic_and_complex1-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__typo_proximity__trap_complex2-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_phrase-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_proximity_tms_last_simple-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_all-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-2.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_phrase-5.snap diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap similarity index 100% rename from milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap rename to crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__words_tms__words_tms_last_simple-2.snap diff --git a/milli/src/search/new/tests/sort.rs b/crates/milli/src/search/new/tests/sort.rs similarity index 100% rename from milli/src/search/new/tests/sort.rs rename to crates/milli/src/search/new/tests/sort.rs diff --git a/milli/src/search/new/tests/stop_words.rs b/crates/milli/src/search/new/tests/stop_words.rs similarity index 100% rename from milli/src/search/new/tests/stop_words.rs rename to crates/milli/src/search/new/tests/stop_words.rs diff --git a/milli/src/search/new/tests/typo.rs b/crates/milli/src/search/new/tests/typo.rs similarity index 100% rename from milli/src/search/new/tests/typo.rs rename to crates/milli/src/search/new/tests/typo.rs diff --git a/milli/src/search/new/tests/typo_proximity.rs b/crates/milli/src/search/new/tests/typo_proximity.rs similarity index 100% rename from milli/src/search/new/tests/typo_proximity.rs rename to crates/milli/src/search/new/tests/typo_proximity.rs diff --git a/milli/src/search/new/tests/words_tms.rs b/crates/milli/src/search/new/tests/words_tms.rs similarity index 100% rename from milli/src/search/new/tests/words_tms.rs rename to crates/milli/src/search/new/tests/words_tms.rs diff --git a/milli/src/search/new/vector_sort.rs b/crates/milli/src/search/new/vector_sort.rs similarity index 90% rename from milli/src/search/new/vector_sort.rs rename to crates/milli/src/search/new/vector_sort.rs index de1dacbe7..90377c09c 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/crates/milli/src/search/new/vector_sort.rs @@ -1,11 +1,10 @@ use std::iter::FromIterator; -use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use crate::score_details::{self, ScoreDetails}; -use crate::vector::{DistributionShift, Embedder}; +use crate::vector::{ArroyWrapper, DistributionShift, Embedder}; use crate::{DocumentId, Result, SearchContext, SearchLogger}; pub struct VectorSort { @@ -53,14 +52,9 @@ impl VectorSort { vector_candidates: &RoaringBitmap, ) -> Result<()> { let target = &self.target; - let mut results = Vec::new(); - for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) { - let nns_by_vector = - reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; - results.extend(nns_by_vector.into_iter()); - } - results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); + let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized); + let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; self.cached_sorted_docids = results.into_iter(); Ok(()) diff --git a/milli/src/search/similar.rs b/crates/milli/src/search/similar.rs similarity index 86% rename from milli/src/search/similar.rs rename to crates/milli/src/search/similar.rs index 0cb8d723d..5547d800e 100644 --- a/milli/src/search/similar.rs +++ b/crates/milli/src/search/similar.rs @@ -1,10 +1,9 @@ use std::sync::Arc; -use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use crate::score_details::{self, ScoreDetails}; -use crate::vector::Embedder; +use crate::vector::{ArroyWrapper, Embedder}; use crate::{filtered_universe, DocumentId, Filter, Index, Result, SearchResult}; pub struct Similar<'a> { @@ -71,23 +70,13 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let mut results = Vec::new(); - - for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) { - let nns_by_item = reader?.nns_by_item( - self.rtxn, - self.id, - self.limit + self.offset + 1, - Some(&universe), - )?; - if let Some(mut nns_by_item) = nns_by_item { - results.append(&mut nns_by_item); - } else { - break; - } - } - - results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); + let reader = ArroyWrapper::new(self.index.vector_arroy, embedder_index, self.quantized); + let results = reader.nns_by_item( + self.rtxn, + self.id, + self.limit + self.offset + 1, + Some(&universe), + )?; let mut documents_ids = Vec::with_capacity(self.limit); let mut document_scores = Vec::with_capacity(self.limit); diff --git a/milli/src/snapshot_tests.rs b/crates/milli/src/snapshot_tests.rs similarity index 100% rename from milli/src/snapshot_tests.rs rename to crates/milli/src/snapshot_tests.rs diff --git a/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap b/crates/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap similarity index 100% rename from milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap rename to crates/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap diff --git a/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap b/crates/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap similarity index 100% rename from milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap rename to crates/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap diff --git a/milli/src/thread_pool_no_abort.rs b/crates/milli/src/thread_pool_no_abort.rs similarity index 100% rename from milli/src/thread_pool_no_abort.rs rename to crates/milli/src/thread_pool_no_abort.rs diff --git a/crates/milli/src/update/available_documents_ids.rs b/crates/milli/src/update/available_documents_ids.rs new file mode 100644 index 000000000..e69de29bb diff --git a/milli/src/update/available_ids.rs b/crates/milli/src/update/available_ids.rs similarity index 100% rename from milli/src/update/available_ids.rs rename to crates/milli/src/update/available_ids.rs diff --git a/milli/src/update/clear_documents.rs b/crates/milli/src/update/clear_documents.rs similarity index 100% rename from milli/src/update/clear_documents.rs rename to crates/milli/src/update/clear_documents.rs diff --git a/milli/src/update/concurrent_available_ids.rs b/crates/milli/src/update/concurrent_available_ids.rs similarity index 100% rename from milli/src/update/concurrent_available_ids.rs rename to crates/milli/src/update/concurrent_available_ids.rs diff --git a/milli/src/update/del_add.rs b/crates/milli/src/update/del_add.rs similarity index 100% rename from milli/src/update/del_add.rs rename to crates/milli/src/update/del_add.rs diff --git a/milli/src/update/facet/bulk.rs b/crates/milli/src/update/facet/bulk.rs similarity index 100% rename from milli/src/update/facet/bulk.rs rename to crates/milli/src/update/facet/bulk.rs diff --git a/milli/src/update/facet/incremental.rs b/crates/milli/src/update/facet/incremental.rs similarity index 100% rename from milli/src/update/facet/incremental.rs rename to crates/milli/src/update/facet/incremental.rs diff --git a/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs similarity index 100% rename from milli/src/update/facet/mod.rs rename to crates/milli/src/update/facet/mod.rs diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert/default.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/default.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert/default.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert/default.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert/large_group_small_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/large_group_small_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert/large_group_small_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert/large_group_small_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert/odd_group_odd_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/odd_group_odd_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert/odd_group_odd_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert/odd_group_odd_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_large_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_large_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert/small_group_large_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_large_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_small_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_small_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert/small_group_small_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_small_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/default.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/default.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/default.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/default.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/large_group_small_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/large_group_small_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/large_group_small_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/large_group_small_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/odd_group_odd_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/odd_group_odd_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/odd_group_odd_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/odd_group_odd_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_large_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_large_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_large_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_large_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_small_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_small_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_small_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_small_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_string/default.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/default.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_string/default.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/default.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_string/large_group_small_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/large_group_small_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_string/large_group_small_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/large_group_small_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_string/odd_group_odd_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/odd_group_odd_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_string/odd_group_odd_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/odd_group_odd_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_large_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_large_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_large_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_large_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_small_min_level.hash.snap b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_small_min_level.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_small_min_level.hash.snap rename to crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_small_min_level.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/append/append.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/append/append.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/append/append.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/append/append.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/0.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/0.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_end/0.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/0.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/100.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/100.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_end/100.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/100.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/15.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/15.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_end/15.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/15.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/150.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/150.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_end/150.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/150.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/17.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/17.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_end/17.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/17.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/200.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/200.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_end/200.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/200.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/127.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/127.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_start/127.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/127.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/215.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/215.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_start/215.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/215.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/255.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/255.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_from_start/255.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/255.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/127.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/127.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/127.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/127.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/215.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/215.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/215.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/215.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/255.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/255.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/255.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/255.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/after_delete.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/after_delete.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/after_delete.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/after_delete.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/before_delete.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/before_delete.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/before_delete.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/before_delete.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_insert/in_place_level0_insert.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_insert/in_place_level0_insert.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/in_place_level0_insert/in_place_level0_insert.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_insert/in_place_level0_insert.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_append/many_field_ids_append.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_append/many_field_ids_append.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/many_field_ids_append/many_field_ids_append.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_append/many_field_ids_append.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_prepend/many_field_ids_prepend.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_prepend/many_field_ids_prepend.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/many_field_ids_prepend/many_field_ids_prepend.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_prepend/many_field_ids_prepend.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/merge_values/merge_values.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/merge_values/merge_values.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/merge_values/merge_values.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/merge_values/merge_values.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/prepend/prepend.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/prepend/prepend.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/prepend/prepend.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/prepend/prepend.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/after_delete.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/after_delete.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/after_delete.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/after_delete.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/before_delete.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/before_delete.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/before_delete.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/before_delete.hash.snap diff --git a/milli/src/update/facet/snapshots/incremental.rs/shuffled/shuffled.hash.snap b/crates/milli/src/update/facet/snapshots/incremental.rs/shuffled/shuffled.hash.snap similarity index 100% rename from milli/src/update/facet/snapshots/incremental.rs/shuffled/shuffled.hash.snap rename to crates/milli/src/update/facet/snapshots/incremental.rs/shuffled/shuffled.hash.snap diff --git a/milli/src/update/index_documents/enrich.rs b/crates/milli/src/update/index_documents/enrich.rs similarity index 100% rename from milli/src/update/index_documents/enrich.rs rename to crates/milli/src/update/index_documents/enrich.rs diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_docid_word_positions.rs rename to crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/crates/milli/src/update/index_documents/extract/extract_facet_number_docids.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_facet_number_docids.rs rename to crates/milli/src/update/index_documents/extract/extract_facet_number_docids.rs diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_facet_string_docids.rs rename to crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs rename to crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/crates/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs rename to crates/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/crates/milli/src/update/index_documents/extract/extract_geo_points.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_geo_points.rs rename to crates/milli/src/update/index_documents/extract/extract_geo_points.rs diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_vector_points.rs rename to crates/milli/src/update/index_documents/extract/extract_vector_points.rs diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_word_docids.rs rename to crates/milli/src/update/index_documents/extract/extract_word_docids.rs diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs rename to crates/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/crates/milli/src/update/index_documents/extract/extract_word_position_docids.rs similarity index 100% rename from milli/src/update/index_documents/extract/extract_word_position_docids.rs rename to crates/milli/src/update/index_documents/extract/extract_word_position_docids.rs diff --git a/milli/src/update/index_documents/extract/mod.rs b/crates/milli/src/update/index_documents/extract/mod.rs similarity index 100% rename from milli/src/update/index_documents/extract/mod.rs rename to crates/milli/src/update/index_documents/extract/mod.rs diff --git a/milli/src/update/index_documents/helpers/clonable_mmap.rs b/crates/milli/src/update/index_documents/helpers/clonable_mmap.rs similarity index 100% rename from milli/src/update/index_documents/helpers/clonable_mmap.rs rename to crates/milli/src/update/index_documents/helpers/clonable_mmap.rs diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/crates/milli/src/update/index_documents/helpers/grenad_helpers.rs similarity index 98% rename from milli/src/update/index_documents/helpers/grenad_helpers.rs rename to crates/milli/src/update/index_documents/helpers/grenad_helpers.rs index 220567208..b7da39878 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/crates/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -83,6 +83,8 @@ pub fn writer_into_reader( grenad::Reader::new(BufReader::new(file)).map_err(Into::into) } +/// # Safety +/// We use memory mapping inside. So, according to the Rust community, it's unsafe. pub unsafe fn as_cloneable_grenad( reader: &grenad::Reader>, ) -> Result> { diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/crates/milli/src/update/index_documents/helpers/merge_functions.rs similarity index 100% rename from milli/src/update/index_documents/helpers/merge_functions.rs rename to crates/milli/src/update/index_documents/helpers/merge_functions.rs diff --git a/milli/src/update/index_documents/helpers/mod.rs b/crates/milli/src/update/index_documents/helpers/mod.rs similarity index 100% rename from milli/src/update/index_documents/helpers/mod.rs rename to crates/milli/src/update/index_documents/helpers/mod.rs diff --git a/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs similarity index 99% rename from milli/src/update/index_documents/mod.rs rename to crates/milli/src/update/index_documents/mod.rs index 49afe9ac5..befde896d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -683,9 +683,8 @@ where key: None, }, )?; - let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); let reader = - ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized); + ArroyWrapper::new(self.index.vector_arroy, index, action.was_quantized); let dim = reader.dimensions(self.wtxn)?; dimension.insert(name.to_string(), dim); } @@ -694,6 +693,7 @@ where for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; let vector_arroy = self.index.vector_arroy; + let cancel = &self.should_abort; let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, @@ -707,17 +707,8 @@ where let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { - for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized); - if is_quantizing { - writer.quantize(wtxn, k, dimension)?; - } - if writer.need_build(wtxn, dimension)? { - writer.build(wtxn, &mut rng, dimension)?; - } else if writer.is_empty(wtxn, dimension)? { - break; - } - } + let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized); + writer.build_and_quantize(wtxn, &mut rng, dimension, is_quantizing, cancel)?; Result::Ok(()) }) .map_err(InternalError::from)??; diff --git a/milli/src/update/index_documents/parallel.rs b/crates/milli/src/update/index_documents/parallel.rs similarity index 100% rename from milli/src/update/index_documents/parallel.rs rename to crates/milli/src/update/index_documents/parallel.rs diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_exists_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_exists_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_exists_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_exists_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_pair_proximity_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_pair_proximity_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_pair_proximity_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/word_pair_proximity_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/initial/word_docids.snap diff --git a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap b/crates/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap similarity index 100% rename from milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap rename to crates/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap diff --git a/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs similarity index 96% rename from milli/src/update/index_documents/transform.rs rename to crates/milli/src/update/index_documents/transform.rs index 84135ff24..7239e8bff 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -996,27 +996,24 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result, &RoaringBitmap)>> = settings_diff + let readers: BTreeMap<&str, (ArroyWrapper, &RoaringBitmap)> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { if let Some(WriteBackToDocuments { embedder_id, user_provided }) = action.write_back() { - let readers: Result> = self - .index - .arroy_readers(wtxn, *embedder_id, action.was_quantized) - .collect(); - match readers { - Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), - Err(error) => Some(Err(error)), - } + let reader = ArroyWrapper::new( + self.index.vector_arroy, + *embedder_id, + action.was_quantized, + ); + Some((name.as_str(), (reader, user_provided))) } else { None } }) .collect(); - let readers = readers?; let old_vectors_fid = settings_diff .old @@ -1055,34 +1052,24 @@ impl<'a, 'i> Transform<'a, 'i> { arroy::Error, > = readers .iter() - .filter_map(|(name, (readers, user_provided))| { + .filter_map(|(name, (reader, user_provided))| { if !user_provided.contains(docid) { return None; } - let mut vectors = Vec::new(); - for reader in readers { - let Some(vector) = reader.item_vector(wtxn, docid).transpose() else { - break; - }; - - match vector { - Ok(vector) => vectors.push(vector), - Err(error) => return Some(Err(error)), - } + match reader.item_vectors(wtxn, docid) { + Ok(vectors) if vectors.is_empty() => None, + Ok(vectors) => Some(Ok(( + name.to_string(), + serde_json::to_value(ExplicitVectors { + embeddings: Some( + VectorOrArrayOfVectors::from_array_of_vectors(vectors), + ), + regenerate: false, + }) + .unwrap(), + ))), + Err(e) => Some(Err(e)), } - if vectors.is_empty() { - return None; - } - Some(Ok(( - name.to_string(), - serde_json::to_value(ExplicitVectors { - embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors( - vectors, - )), - regenerate: false, - }) - .unwrap(), - ))) }) .collect(); @@ -1111,11 +1098,9 @@ impl<'a, 'i> Transform<'a, 'i> { } // delete all vectors from the embedders that need removal - for (_, (readers, _)) in readers { - for reader in readers { - let dimensions = reader.dimensions(wtxn)?; - reader.clear(wtxn, dimensions)?; - } + for (_, (reader, _)) in readers { + let dimensions = reader.dimensions(wtxn)?; + reader.clear(wtxn, dimensions)?; } let grenad_params = GrenadParameters { diff --git a/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs similarity index 92% rename from milli/src/update/index_documents/typed_chunk.rs rename to crates/milli/src/update/index_documents/typed_chunk.rs index 1f477cf4b..2c30220bc 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -668,22 +668,14 @@ pub(crate) fn write_typed_chunk_into_index( .get(&embedder_name) .map_or(false, |conf| conf.2); // FIXME: allow customizing distance - let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) - .collect(); + let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized); // remove vectors for docids we want them removed let merger = remove_vectors_builder.build(); let mut iter = merger.into_stream_merger_iter()?; while let Some((key, _)) = iter.next()? { let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap(); - - for writer in &writers { - // Uses invariant: vectors are packed in the first writers. - if !writer.del_item(wtxn, expected_dimension, docid)? { - break; - } - } + writer.del_items(wtxn, expected_dimension, docid)?; } // add generated embeddings @@ -711,9 +703,7 @@ pub(crate) fn write_typed_chunk_into_index( embeddings.embedding_count(), ))); } - for (embedding, writer) in embeddings.iter().zip(&writers) { - writer.add_item(wtxn, expected_dimension, docid, embedding)?; - } + writer.add_items(wtxn, docid, &embeddings)?; } // perform the manual diff @@ -728,51 +718,14 @@ pub(crate) fn write_typed_chunk_into_index( if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) { let vector: Vec = pod_collect_to_vec(value); - let mut deleted_index = None; - for (index, writer) in writers.iter().enumerate() { - let Some(candidate) = writer.item_vector(wtxn, docid)? else { - // uses invariant: vectors are packed in the first writers. - break; - }; - if candidate == vector { - writer.del_item(wtxn, expected_dimension, docid)?; - deleted_index = Some(index); - } - } - - // 🥲 enforce invariant: vectors are packed in the first writers. - if let Some(deleted_index) = deleted_index { - let mut last_index_with_a_vector = None; - for (index, writer) in writers.iter().enumerate().skip(deleted_index) { - let Some(candidate) = writer.item_vector(wtxn, docid)? else { - break; - }; - last_index_with_a_vector = Some((index, candidate)); - } - if let Some((last_index, vector)) = last_index_with_a_vector { - // unwrap: computed the index from the list of writers - let writer = writers.get(last_index).unwrap(); - writer.del_item(wtxn, expected_dimension, docid)?; - writers.get(deleted_index).unwrap().add_item( - wtxn, - expected_dimension, - docid, - &vector, - )?; - } - } + writer.del_item(wtxn, docid, &vector)?; } if let Some(value) = vector_deladd_obkv.get(DelAdd::Addition) { let vector = pod_collect_to_vec(value); // overflow was detected during vector extraction. - for writer in &writers { - if !writer.contains_item(wtxn, expected_dimension, docid)? { - writer.add_item(wtxn, expected_dimension, docid, &vector)?; - break; - } - } + writer.add_item(wtxn, docid, &vector)?; } } diff --git a/milli/src/update/indexer_config.rs b/crates/milli/src/update/indexer_config.rs similarity index 100% rename from milli/src/update/indexer_config.rs rename to crates/milli/src/update/indexer_config.rs diff --git a/milli/src/update/mod.rs b/crates/milli/src/update/mod.rs similarity index 100% rename from milli/src/update/mod.rs rename to crates/milli/src/update/mod.rs diff --git a/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs similarity index 58% rename from milli/src/update/new/channel.rs rename to crates/milli/src/update/new/channel.rs index 92f692a88..dee82e6d9 100644 --- a/milli/src/update/new/channel.rs +++ b/crates/milli/src/update/new/channel.rs @@ -1,44 +1,37 @@ -use std::fs::File; use std::marker::PhantomData; +use std::sync::atomic::{AtomicUsize, Ordering}; use crossbeam_channel::{IntoIter, Receiver, SendError, Sender}; use grenad::Merger; use hashbrown::HashMap; use heed::types::Bytes; -use memmap2::Mmap; use roaring::RoaringBitmap; use super::extract::FacetKind; use super::StdResult; -use crate::index::main_key::{DOCUMENTS_IDS_KEY, WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; +use crate::index::main_key::DOCUMENTS_IDS_KEY; use crate::update::new::KvReaderFieldId; use crate::update::MergeDeladdCboRoaringBitmaps; use crate::vector::Embedding; use crate::{DocumentId, Index}; /// The capacity of the channel is currently in number of messages. -pub fn merger_writer_channel(cap: usize) -> (MergerSender, WriterReceiver) { +pub fn extractor_writer_channel(cap: usize) -> (ExtractorSender, WriterReceiver) { let (sender, receiver) = crossbeam_channel::bounded(cap); ( - MergerSender { + ExtractorSender { sender, send_count: Default::default(), writer_contentious_count: Default::default(), - merger_contentious_count: Default::default(), + extractor_contentious_count: Default::default(), }, WriterReceiver(receiver), ) } -/// The capacity of the channel is currently in number of messages. -pub fn extractors_merger_channels(cap: usize) -> (ExtractorSender, MergerReceiver) { - let (sender, receiver) = crossbeam_channel::bounded(cap); - (ExtractorSender(sender), MergerReceiver(receiver)) -} - -pub enum KeyValueEntry { - SmallInMemory { key_length: usize, data: Box<[u8]> }, - LargeOnDisk { key: Box<[u8]>, value: Mmap }, +pub struct KeyValueEntry { + pub key_length: usize, + pub data: Box<[u8]>, } impl KeyValueEntry { @@ -46,32 +39,22 @@ impl KeyValueEntry { let mut data = Vec::with_capacity(key.len() + value.len()); data.extend_from_slice(key); data.extend_from_slice(value); - KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() } + KeyValueEntry { key_length: key.len(), data: data.into_boxed_slice() } } pub fn from_small_key_bitmap(key: &[u8], bitmap: RoaringBitmap) -> Self { let mut data = Vec::with_capacity(key.len() + bitmap.serialized_size()); data.extend_from_slice(key); bitmap.serialize_into(&mut data).unwrap(); - KeyValueEntry::SmallInMemory { key_length: key.len(), data: data.into_boxed_slice() } - } - - pub fn from_large_key_value(key: &[u8], value: Mmap) -> Self { - KeyValueEntry::LargeOnDisk { key: key.to_vec().into_boxed_slice(), value } + KeyValueEntry { key_length: key.len(), data: data.into_boxed_slice() } } pub fn key(&self) -> &[u8] { - match self { - KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[..*key_length], - KeyValueEntry::LargeOnDisk { key, value: _ } => key.as_ref(), - } + &self.data[..self.key_length] } pub fn value(&self) -> &[u8] { - match self { - KeyValueEntry::SmallInMemory { key_length, data } => &data.as_ref()[*key_length..], - KeyValueEntry::LargeOnDisk { key: _, value } => value.as_ref(), - } + &self.data[self.key_length..] } } @@ -94,37 +77,6 @@ pub enum EntryOperation { Write(KeyValueEntry), } -pub struct DocumentEntry { - docid: DocumentId, - content: Box<[u8]>, -} - -impl DocumentEntry { - pub fn new_uncompressed(docid: DocumentId, content: Box) -> Self { - DocumentEntry { docid, content: content.into() } - } - - pub fn new_compressed(docid: DocumentId, content: Box<[u8]>) -> Self { - DocumentEntry { docid, content } - } - - pub fn key(&self) -> [u8; 4] { - self.docid.to_be_bytes() - } - - pub fn content(&self) -> &[u8] { - &self.content - } -} - -pub struct DocumentDeletionEntry(DocumentId); - -impl DocumentDeletionEntry { - pub fn key(&self) -> [u8; 4] { - self.0.to_be_bytes() - } -} - pub enum WriterOperation { DbOperation(DbOperation), ArroyOperation(ArroyOperation), @@ -227,34 +179,32 @@ impl IntoIterator for WriterReceiver { } } -pub struct MergerSender { +pub struct ExtractorSender { sender: Sender, - /// The number of message we send in total in the channel. - send_count: std::cell::Cell, + /// The number of message we sent in total in the channel. + send_count: AtomicUsize, /// The number of times we sent something in a channel that was full. - writer_contentious_count: std::cell::Cell, + writer_contentious_count: AtomicUsize, /// The number of times we sent something in a channel that was empty. - merger_contentious_count: std::cell::Cell, + extractor_contentious_count: AtomicUsize, } -impl Drop for MergerSender { +impl Drop for ExtractorSender { fn drop(&mut self) { + let send_count = *self.send_count.get_mut(); + let writer_contentious_count = *self.writer_contentious_count.get_mut(); + let extractor_contentious_count = *self.extractor_contentious_count.get_mut(); eprintln!( - "Merger channel stats: {} sends, {} writer contentions ({}%), {} merger contentions ({}%)", - self.send_count.get(), - self.writer_contentious_count.get(), - (self.writer_contentious_count.get() as f32 / self.send_count.get() as f32) * 100.0, - self.merger_contentious_count.get(), - (self.merger_contentious_count.get() as f32 / self.send_count.get() as f32) * 100.0 + "Extractor channel stats: {send_count} sends, \ + {writer_contentious_count} writer contentions ({}%), \ + {extractor_contentious_count} extractor contentions ({}%)", + (writer_contentious_count as f32 / send_count as f32) * 100.0, + (extractor_contentious_count as f32 / send_count as f32) * 100.0 ) } } -impl MergerSender { - pub fn main(&self) -> MainSender<'_> { - MainSender(self) - } - +impl ExtractorSender { pub fn docids(&self) -> WordDocidsSender<'_, D> { WordDocidsSender { sender: self, _marker: PhantomData } } @@ -280,12 +230,13 @@ impl MergerSender { fn send_db_operation(&self, op: DbOperation) -> StdResult<(), SendError<()>> { if self.sender.is_full() { - self.writer_contentious_count.set(self.writer_contentious_count.get() + 1); + self.writer_contentious_count.fetch_add(1, Ordering::SeqCst); } if self.sender.is_empty() { - self.merger_contentious_count.set(self.merger_contentious_count.get() + 1); + self.extractor_contentious_count.fetch_add(1, Ordering::SeqCst); } - self.send_count.set(self.send_count.get() + 1); + + self.send_count.fetch_add(1, Ordering::SeqCst); match self.sender.send(WriterOperation::DbOperation(op)) { Ok(()) => Ok(()), Err(SendError(_)) => Err(SendError(())), @@ -293,129 +244,48 @@ impl MergerSender { } } -pub struct MainSender<'a>(&'a MergerSender); - -impl MainSender<'_> { - pub fn write_words_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value( - WORDS_FST_KEY.as_bytes(), - value, - )); - match self.0.send_db_operation(DbOperation { database: Database::Main, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn write_words_prefixes_fst(&self, value: Mmap) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Write(KeyValueEntry::from_large_key_value( - WORDS_PREFIXES_FST_KEY.as_bytes(), - value, - )); - match self.0.send_db_operation(DbOperation { database: Database::Main, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> { - let entry = EntryOperation::Delete(KeyEntry::from_key(key)); - match self.0.send_db_operation(DbOperation { database: Database::Main, entry }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } -} - pub enum ExactWordDocids {} pub enum FidWordCountDocids {} pub enum WordDocids {} pub enum WordFidDocids {} pub enum WordPairProximityDocids {} pub enum WordPositionDocids {} -pub enum FacetDocids {} pub trait DatabaseType { const DATABASE: Database; } -pub trait MergerOperationType { - fn new_merger_operation(merger: Merger) -> MergerOperation; -} - impl DatabaseType for ExactWordDocids { const DATABASE: Database = Database::ExactWordDocids; } -impl MergerOperationType for ExactWordDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::ExactWordDocidsMerger(merger) - } -} - impl DatabaseType for FidWordCountDocids { const DATABASE: Database = Database::FidWordCountDocids; } -impl MergerOperationType for FidWordCountDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::FidWordCountDocidsMerger(merger) - } -} - impl DatabaseType for WordDocids { const DATABASE: Database = Database::WordDocids; } -impl MergerOperationType for WordDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordDocidsMerger(merger) - } -} - impl DatabaseType for WordFidDocids { const DATABASE: Database = Database::WordFidDocids; } -impl MergerOperationType for WordFidDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordFidDocidsMerger(merger) - } -} - impl DatabaseType for WordPairProximityDocids { const DATABASE: Database = Database::WordPairProximityDocids; } -impl MergerOperationType for WordPairProximityDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordPairProximityDocidsMerger(merger) - } -} - impl DatabaseType for WordPositionDocids { const DATABASE: Database = Database::WordPositionDocids; } -impl MergerOperationType for WordPositionDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::WordPositionDocidsMerger(merger) - } -} - -impl MergerOperationType for FacetDocids { - fn new_merger_operation(merger: Merger) -> MergerOperation { - MergerOperation::FacetDocidsMerger(merger) - } -} - pub trait DocidsSender { fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>; fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>; } pub struct WordDocidsSender<'a, D> { - sender: &'a MergerSender, + sender: &'a ExtractorSender, _marker: PhantomData, } @@ -438,7 +308,7 @@ impl DocidsSender for WordDocidsSender<'_, D> { } pub struct FacetDocidsSender<'a> { - sender: &'a MergerSender, + sender: &'a ExtractorSender, } impl DocidsSender for FacetDocidsSender<'_> { @@ -472,7 +342,7 @@ impl DocidsSender for FacetDocidsSender<'_> { } } -pub struct DocumentsSender<'a>(&'a MergerSender); +pub struct DocumentsSender<'a>(&'a ExtractorSender); impl DocumentsSender<'_> { /// TODO do that efficiently @@ -565,85 +435,3 @@ impl EmbeddingSender<'_> { .map_err(|_| SendError(())) } } -pub enum MergerOperation { - ExactWordDocidsMerger(Merger), - FidWordCountDocidsMerger(Merger), - WordDocidsMerger(Merger), - WordFidDocidsMerger(Merger), - WordPairProximityDocidsMerger(Merger), - WordPositionDocidsMerger(Merger), - FacetDocidsMerger(Merger), - DeleteDocument { docid: DocumentId, external_id: String }, - InsertDocument { docid: DocumentId, external_id: String, document: Box }, - FinishedDocument, -} - -pub struct MergerReceiver(Receiver); - -impl IntoIterator for MergerReceiver { - type Item = MergerOperation; - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -pub struct ExtractorSender(Sender); - -impl ExtractorSender { - pub fn document_sender(&self) -> DocumentSender<'_> { - DocumentSender(Some(&self.0)) - } - - pub fn send_searchable( - &self, - merger: Merger, - ) -> StdResult<(), SendError<()>> { - match self.0.send(D::new_merger_operation(merger)) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } -} - -pub struct DocumentSender<'a>(Option<&'a Sender>); - -impl DocumentSender<'_> { - pub fn insert( - &self, - docid: DocumentId, - external_id: String, - document: Box, - ) -> StdResult<(), SendError<()>> { - let sender = self.0.unwrap(); - match sender.send(MergerOperation::InsertDocument { docid, external_id, document }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> { - let sender = self.0.unwrap(); - match sender.send(MergerOperation::DeleteDocument { docid, external_id }) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } - - pub fn finish(mut self) -> StdResult<(), SendError<()>> { - let sender = self.0.take().unwrap(); - match sender.send(MergerOperation::FinishedDocument) { - Ok(()) => Ok(()), - Err(SendError(_)) => Err(SendError(())), - } - } -} - -impl Drop for DocumentSender<'_> { - fn drop(&mut self) { - if let Some(sender) = self.0.take() { - let _ = sender.send(MergerOperation::FinishedDocument); - } - } -} diff --git a/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs similarity index 100% rename from milli/src/update/new/document.rs rename to crates/milli/src/update/new/document.rs diff --git a/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs similarity index 100% rename from milli/src/update/new/document_change.rs rename to crates/milli/src/update/new/document_change.rs diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs new file mode 100644 index 000000000..63590db69 --- /dev/null +++ b/crates/milli/src/update/new/extract/cache.rs @@ -0,0 +1,729 @@ +//! # How the Merge Algorithm works +//! +//! Each extractor create #Threads caches and balances the entries +//! based on the hash of the keys. To do that we can use the +//! hashbrown::hash_map::RawEntryBuilderMut::from_key_hashed_nocheck. +//! This way we can compute the hash on our own, decide on the cache to +//! target, and insert it into the right HashMap. +//! +//! #Thread -> caches +//! t1 -> [t1c1, t1c2, t1c3] +//! t2 -> [t2c1, t2c2, t2c3] +//! t3 -> [t3c1, t3c2, t3c3] +//! +//! When the extractors are done filling the caches, we want to merge +//! the content of all the caches. We do a transpose and each thread is +//! assigned the associated cache. By doing that we know that every key +//! is put in a known cache and will collide with keys in the other +//! caches of the other threads. +//! +//! #Thread -> caches +//! t1 -> [t1c1, t2c1, t3c1] +//! t2 -> [t1c2, t2c2, t3c2] +//! t3 -> [t1c3, t2c3, t3c3] +//! +//! When we encountered a miss in the other caches we must still try +//! to find it in the spilled entries. This is the reason why we use +//! a grenad sorter/reader so that we can seek "efficiently" for a key. +//! +//! ## More Detailled Algorithm +//! +//! Each sub-cache has an in-memory HashMap and some spilled +//! lexicographically ordered entries on disk (grenad). We first iterate +//! over the spilled entries of all the caches at once by using a merge +//! join algorithm. This algorithm will merge the entries by using its +//! merge function. +//! +//! Everytime a merged entry is emited by the merge join algorithm we also +//! fetch the value from the other in-memory caches (HashMaps) to finish +//! the merge. Everytime we retrieve an entry from the in-memory caches +//! we mark them with a tombstone for later. +//! +//! Once we are done with the spilled entries we iterate over the in-memory +//! HashMaps. We iterate over the first one, retrieve the content from the +//! other onces and mark them with a tombstone again. We also make sure +//! to ignore the dead (tombstoned) ones. +//! +//! ## Memory Control +//! +//! We can detect that there are no more memory available when the +//! bump allocator reaches a threshold. When this is the case we +//! freeze the cache. There is one bump allocator by thread and the +//! memory must be well balanced as we manage one type of extraction +//! at a time with well-balanced documents. +//! +//! It means that the unknown new keys added to the +//! cache are directly spilled to disk: basically a key followed by a +//! del/add bitmap. For the known keys we can keep modifying them in +//! the materialized version in the cache: update the del/add bitmaps. +//! +//! For now we can use a grenad sorter for spilling even thought I think +//! it's not the most efficient way (too many files open, sorting entries). + +use std::cmp::Ordering; +use std::collections::binary_heap::PeekMut; +use std::collections::BinaryHeap; +use std::fs::File; +use std::hash::BuildHasher; +use std::io::BufReader; +use std::{io, iter, mem}; + +use bumpalo::Bump; +use grenad::ReaderCursor; +use hashbrown::hash_map::RawEntryMut; +use hashbrown::HashMap; +use raw_collections::bbbul::{BitPacker, BitPacker4x}; +use raw_collections::map::FrozenMap; +use raw_collections::{Bbbul, FrozenBbbul}; +use roaring::RoaringBitmap; +use rustc_hash::FxBuildHasher; + +use crate::update::del_add::{DelAdd, KvWriterDelAdd}; +use crate::update::new::indexer::document_changes::MostlySend; +use crate::update::new::KvReaderDelAdd; +use crate::update::MergeDeladdCboRoaringBitmaps; +use crate::{CboRoaringBitmapCodec, Result}; + +/// A cache that stores bytes keys associated to CboDelAddRoaringBitmaps. +/// +/// Internally balances the content over `N` buckets for future merging. +pub struct BalancedCaches<'extractor> { + hasher: FxBuildHasher, + alloc: &'extractor Bump, + max_memory: Option, + caches: InnerCaches<'extractor>, +} + +enum InnerCaches<'extractor> { + Normal(NormalCaches<'extractor>), + Spilling(SpillingCaches<'extractor>), +} + +impl<'extractor> BalancedCaches<'extractor> { + pub fn new_in(buckets: usize, max_memory: Option, alloc: &'extractor Bump) -> Self { + Self { + hasher: FxBuildHasher, + max_memory, + caches: InnerCaches::Normal(NormalCaches { + caches: iter::repeat_with(|| HashMap::with_hasher_in(FxBuildHasher, alloc)) + .take(buckets) + .collect(), + }), + alloc, + } + } + + fn buckets(&self) -> usize { + match &self.caches { + InnerCaches::Normal(caches) => caches.caches.len(), + InnerCaches::Spilling(caches) => caches.caches.len(), + } + } + + pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> Result<()> { + if self.max_memory.map_or(false, |mm| self.alloc.allocated_bytes() >= mm) { + self.start_spilling()?; + } + + let buckets = self.buckets(); + match &mut self.caches { + InnerCaches::Normal(normal) => { + normal.insert_del_u32(&self.hasher, self.alloc, buckets, key, n); + Ok(()) + } + InnerCaches::Spilling(spilling) => { + spilling.insert_del_u32(&self.hasher, self.alloc, buckets, key, n) + } + } + } + + pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> Result<()> { + if self.max_memory.map_or(false, |mm| self.alloc.allocated_bytes() >= mm) { + self.start_spilling()?; + } + + let buckets = self.buckets(); + match &mut self.caches { + InnerCaches::Normal(normal) => { + normal.insert_add_u32(&self.hasher, self.alloc, buckets, key, n); + Ok(()) + } + InnerCaches::Spilling(spilling) => { + spilling.insert_add_u32(&self.hasher, self.alloc, buckets, key, n) + } + } + } + + /// Make sure the cache is no longer allocating data + /// and writes every new and unknow entry to disk. + fn start_spilling(&mut self) -> Result<()> { + let BalancedCaches { hasher: _, alloc, max_memory: _, caches } = self; + + if let InnerCaches::Normal(normal_caches) = caches { + eprintln!( + "We are spilling after we allocated {} bytes on thread #{}", + alloc.allocated_bytes(), + rayon::current_thread_index().unwrap_or(0) + ); + + let allocated: usize = normal_caches.caches.iter().map(|m| m.allocation_size()).sum(); + eprintln!("The last allocated HashMap took {allocated} bytes"); + + let dummy = NormalCaches { caches: Vec::new() }; + let NormalCaches { caches: cache_maps } = mem::replace(normal_caches, dummy); + *caches = InnerCaches::Spilling(SpillingCaches::from_cache_maps(cache_maps)); + } + + Ok(()) + } + + pub fn freeze(&mut self) -> Result>> { + match &mut self.caches { + InnerCaches::Normal(NormalCaches { caches }) => caches + .iter_mut() + .enumerate() + .map(|(bucket, map)| { + // safety: we are transmuting the Bbbul into a FrozenBbbul + // that are the same size. + let map = unsafe { + std::mem::transmute::< + &mut HashMap< + &[u8], + DelAddBbbul, // from this + FxBuildHasher, + &Bump, + >, + &mut HashMap< + &[u8], + FrozenDelAddBbbul, // to that + FxBuildHasher, + &Bump, + >, + >(map) + }; + Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() }) + }) + .collect(), + InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches + .iter_mut() + .zip(mem::take(spilled_entries)) + .enumerate() + .map(|(bucket, (map, sorter))| { + let spilled = sorter + .into_reader_cursors()? + .into_iter() + .map(ReaderCursor::into_inner) + .map(BufReader::new) + .map(|bufreader| grenad::Reader::new(bufreader).map_err(Into::into)) + .collect::>()?; + // safety: we are transmuting the Bbbul into a FrozenBbbul + // that are the same size. + let map = unsafe { + std::mem::transmute::< + &mut HashMap< + &[u8], + DelAddBbbul, // from this + FxBuildHasher, + &Bump, + >, + &mut HashMap< + &[u8], + FrozenDelAddBbbul, // to that + FxBuildHasher, + &Bump, + >, + >(map) + }; + Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled }) + }) + .collect(), + } + } +} + +unsafe impl MostlySend for BalancedCaches<'_> {} + +struct NormalCaches<'extractor> { + caches: Vec< + HashMap< + &'extractor [u8], + DelAddBbbul<'extractor, BitPacker4x>, + FxBuildHasher, + &'extractor Bump, + >, + >, +} + +impl<'extractor> NormalCaches<'extractor> { + pub fn insert_del_u32( + &mut self, + hasher: &FxBuildHasher, + alloc: &'extractor Bump, + buckets: usize, + key: &[u8], + n: u32, + ) { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().del.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n); + } + RawEntryMut::Vacant(entry) => { + entry.insert_hashed_nocheck( + hash, + alloc.alloc_slice_copy(key), + DelAddBbbul::new_del_u32_in(n, alloc), + ); + } + } + } + + pub fn insert_add_u32( + &mut self, + hasher: &FxBuildHasher, + alloc: &'extractor Bump, + buckets: usize, + key: &[u8], + n: u32, + ) { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().add.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n); + } + RawEntryMut::Vacant(entry) => { + entry.insert_hashed_nocheck( + hash, + alloc.alloc_slice_copy(key), + DelAddBbbul::new_add_u32_in(n, alloc), + ); + } + } + } +} + +struct SpillingCaches<'extractor> { + caches: Vec< + HashMap< + &'extractor [u8], + DelAddBbbul<'extractor, BitPacker4x>, + FxBuildHasher, + &'extractor Bump, + >, + >, + spilled_entries: Vec>, + deladd_buffer: Vec, + cbo_buffer: Vec, +} + +impl<'extractor> SpillingCaches<'extractor> { + fn from_cache_maps( + caches: Vec< + HashMap< + &'extractor [u8], + DelAddBbbul<'extractor, BitPacker4x>, + FxBuildHasher, + &'extractor Bump, + >, + >, + ) -> SpillingCaches<'extractor> { + SpillingCaches { + spilled_entries: iter::repeat_with(|| { + let mut builder = grenad::SorterBuilder::new(MergeDeladdCboRoaringBitmaps); + builder.dump_threshold(0); + builder.allow_realloc(false); + builder.build() + }) + .take(caches.len()) + .collect(), + caches, + deladd_buffer: Vec::new(), + cbo_buffer: Vec::new(), + } + } + + pub fn insert_del_u32( + &mut self, + hasher: &FxBuildHasher, + alloc: &'extractor Bump, + buckets: usize, + key: &[u8], + n: u32, + ) -> Result<()> { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().del.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n); + Ok(()) + } + RawEntryMut::Vacant(_entry) => spill_entry_to_sorter( + &mut self.spilled_entries[bucket], + &mut self.deladd_buffer, + &mut self.cbo_buffer, + key, + DelAddRoaringBitmap::new_del_u32(n), + ), + } + } + + pub fn insert_add_u32( + &mut self, + hasher: &FxBuildHasher, + alloc: &'extractor Bump, + buckets: usize, + key: &[u8], + n: u32, + ) -> Result<()> { + let hash = hasher.hash_one(key); + let bucket = compute_bucket_from_hash(buckets, hash); + match self.caches[bucket].raw_entry_mut().from_hash(hash, |&k| k == key) { + RawEntryMut::Occupied(mut entry) => { + entry.get_mut().add.get_or_insert_with(|| Bbbul::new_in(alloc)).insert(n); + Ok(()) + } + RawEntryMut::Vacant(_entry) => spill_entry_to_sorter( + &mut self.spilled_entries[bucket], + &mut self.deladd_buffer, + &mut self.cbo_buffer, + key, + DelAddRoaringBitmap::new_add_u32(n), + ), + } + } +} + +#[inline] +fn compute_bucket_from_hash(buckets: usize, hash: u64) -> usize { + hash as usize % buckets +} + +fn spill_entry_to_sorter( + spilled_entries: &mut grenad::Sorter, + deladd_buffer: &mut Vec, + cbo_buffer: &mut Vec, + key: &[u8], + deladd: DelAddRoaringBitmap, +) -> Result<()> { + deladd_buffer.clear(); + let mut value_writer = KvWriterDelAdd::new(deladd_buffer); + + match deladd { + DelAddRoaringBitmap { del: Some(del), add: None } => { + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer); + value_writer.insert(DelAdd::Deletion, &cbo_buffer)?; + } + DelAddRoaringBitmap { del: None, add: Some(add) } => { + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer); + value_writer.insert(DelAdd::Addition, &cbo_buffer)?; + } + DelAddRoaringBitmap { del: Some(del), add: Some(add) } => { + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer); + value_writer.insert(DelAdd::Deletion, &cbo_buffer)?; + + cbo_buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer); + value_writer.insert(DelAdd::Addition, &cbo_buffer)?; + } + DelAddRoaringBitmap { del: None, add: None } => return Ok(()), + } + + let bytes = value_writer.into_inner().unwrap(); + spilled_entries.insert(key, bytes).map_err(Into::into) +} + +pub struct FrozenCache<'a, 'extractor> { + bucket: usize, + cache: FrozenMap< + 'a, + 'extractor, + &'extractor [u8], + FrozenDelAddBbbul<'extractor, BitPacker4x>, + FxBuildHasher, + >, + spilled: Vec>>, +} + +pub fn transpose_and_freeze_caches<'a, 'extractor>( + caches: &'a mut [BalancedCaches<'extractor>], +) -> Result>>> { + let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0); + let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect(); + + for thread_cache in caches { + for frozen in thread_cache.freeze()? { + bucket_caches[frozen.bucket].push(frozen); + } + } + + Ok(bucket_caches) +} + +/// Merges the caches that must be all associated to the same bucket. +/// +/// # Panics +/// +/// - If the bucket IDs in these frozen caches are not exactly the same. +pub fn merge_caches(frozen: Vec, mut f: F) -> Result<()> +where + F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>, +{ + let mut maps = Vec::new(); + let mut readers = Vec::new(); + let mut current_bucket = None; + for FrozenCache { bucket, cache, ref mut spilled } in frozen { + assert_eq!(*current_bucket.get_or_insert(bucket), bucket); + maps.push(cache); + readers.append(spilled); + } + + // First manage the spilled entries by looking into the HashMaps, + // merge them and mark them as dummy. + let mut heap = BinaryHeap::new(); + for (source_index, source) in readers.into_iter().enumerate() { + let mut cursor = source.into_cursor()?; + if cursor.move_on_next()?.is_some() { + heap.push(Entry { cursor, source_index }); + } + } + + loop { + let mut first_entry = match heap.pop() { + Some(entry) => entry, + None => break, + }; + + let (first_key, first_value) = match first_entry.cursor.current() { + Some((key, value)) => (key, value), + None => break, + }; + + let mut output = DelAddRoaringBitmap::from_bytes(first_value)?; + while let Some(mut entry) = heap.peek_mut() { + if let Some((key, _value)) = entry.cursor.current() { + if first_key == key { + let new = DelAddRoaringBitmap::from_bytes(first_value)?; + output = output.merge(new); + // When we are done we the current value of this entry move make + // it move forward and let the heap reorganize itself (on drop) + if entry.cursor.move_on_next()?.is_none() { + PeekMut::pop(entry); + } + } else { + break; + } + } + } + + // Once we merged all of the spilled bitmaps we must also + // fetch the entries from the non-spilled entries (the HashMaps). + for (map_index, map) in maps.iter_mut().enumerate() { + if first_entry.source_index != map_index { + if let Some(new) = map.get_mut(first_key) { + output.append_and_clear_bbbul(new); + } + } + } + + // We send the merged entry outside. + (f)(first_key, output)?; + + // Don't forget to put the first entry back into the heap. + if first_entry.cursor.move_on_next()?.is_some() { + heap.push(first_entry) + } + } + + // Then manage the content on the HashMap entries that weren't taken (mem::take). + while let Some(mut map) = maps.pop() { + for (key, bbbul) in map.iter_mut() { + let mut output = DelAddRoaringBitmap::empty(); + output.append_and_clear_bbbul(bbbul); + + // Make sure we don't try to work with entries already managed by the spilled + if !bbbul.is_empty() { + for rhs in maps.iter_mut() { + if let Some(new) = rhs.get_mut(key) { + output.append_and_clear_bbbul(new); + } + } + + // We send the merged entry outside. + (f)(key, output)?; + } + } + } + + Ok(()) +} + +struct Entry { + cursor: ReaderCursor, + source_index: usize, +} + +impl Ord for Entry { + fn cmp(&self, other: &Entry) -> Ordering { + let skey = self.cursor.current().map(|(k, _)| k); + let okey = other.cursor.current().map(|(k, _)| k); + skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse() + } +} + +impl Eq for Entry {} + +impl PartialEq for Entry { + fn eq(&self, other: &Entry) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl PartialOrd for Entry { + fn partial_cmp(&self, other: &Entry) -> Option { + Some(self.cmp(other)) + } +} + +pub struct DelAddBbbul<'bump, B> { + pub del: Option>, + pub add: Option>, +} + +impl<'bump, B: BitPacker> DelAddBbbul<'bump, B> { + pub fn insert_del_u32_in(&mut self, n: u32, bump: &'bump Bump) { + self.del.get_or_insert_with(|| Bbbul::new_in(bump)).insert(n); + } + + pub fn insert_add_u32_in(&mut self, n: u32, bump: &'bump Bump) { + self.add.get_or_insert_with(|| Bbbul::new_in(bump)).insert(n); + } + + pub fn new_del_u32_in(n: u32, bump: &'bump Bump) -> Self { + let mut bbbul = Bbbul::new_in(bump); + bbbul.insert(n); + DelAddBbbul { del: Some(bbbul), add: None } + } + + pub fn new_add_u32_in(n: u32, bump: &'bump Bump) -> Self { + let mut bbbul = Bbbul::new_in(bump); + bbbul.insert(n); + DelAddBbbul { del: None, add: Some(bbbul) } + } +} + +pub struct FrozenDelAddBbbul<'bump, B> { + pub del: Option>, + pub add: Option>, +} + +impl<'bump, B> FrozenDelAddBbbul<'bump, B> { + fn is_empty(&self) -> bool { + self.del.is_none() && self.add.is_none() + } +} + +#[derive(Debug, Default, Clone)] +pub struct DelAddRoaringBitmap { + pub del: Option, + pub add: Option, +} + +impl DelAddRoaringBitmap { + fn from_bytes(bytes: &[u8]) -> io::Result { + let reader = KvReaderDelAdd::from_slice(bytes); + + let del = match reader.get(DelAdd::Deletion) { + Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?, + None => None, + }; + + let add = match reader.get(DelAdd::Addition) { + Some(bytes) => CboRoaringBitmapCodec::deserialize_from(bytes).map(Some)?, + None => None, + }; + + Ok(DelAddRoaringBitmap { del, add }) + } + + pub fn empty() -> DelAddRoaringBitmap { + DelAddRoaringBitmap { del: None, add: None } + } + + pub fn is_empty(&self) -> bool { + let DelAddRoaringBitmap { del, add } = self; + del.is_none() && add.is_none() + } + + pub fn insert_del_u32(&mut self, n: u32) { + self.del.get_or_insert_with(RoaringBitmap::new).insert(n); + } + + pub fn insert_add_u32(&mut self, n: u32) { + self.add.get_or_insert_with(RoaringBitmap::new).insert(n); + } + + pub fn new_del_u32(n: u32) -> Self { + DelAddRoaringBitmap { del: Some(RoaringBitmap::from([n])), add: None } + } + + pub fn new_add_u32(n: u32) -> Self { + DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) } + } + + pub fn append_and_clear_bbbul(&mut self, bbbul: &mut FrozenDelAddBbbul<'_, B>) { + let FrozenDelAddBbbul { del, add } = bbbul; + + if let Some(ref mut bbbul) = del.take() { + let del = self.del.get_or_insert_with(RoaringBitmap::new); + let mut iter = bbbul.iter_and_clear(); + while let Some(block) = iter.next_block() { + del.append(block.iter().copied()); + } + } + + if let Some(ref mut bbbul) = add.take() { + let add = self.add.get_or_insert_with(RoaringBitmap::new); + let mut iter = bbbul.iter_and_clear(); + while let Some(block) = iter.next_block() { + add.append(block.iter().copied()); + } + } + } + + pub fn merge(self, rhs: DelAddRoaringBitmap) -> DelAddRoaringBitmap { + let DelAddRoaringBitmap { del, add } = self; + let DelAddRoaringBitmap { del: ndel, add: nadd } = rhs; + + let del = match (del, ndel) { + (None, None) => None, + (None, Some(del)) | (Some(del), None) => Some(del), + (Some(del), Some(ndel)) => Some(del | ndel), + }; + + let add = match (add, nadd) { + (None, None) => None, + (None, Some(add)) | (Some(add), None) => Some(add), + (Some(add), Some(nadd)) => Some(add | nadd), + }; + + DelAddRoaringBitmap { del, add } + } + + pub fn apply_to(&self, documents_ids: &mut RoaringBitmap) { + let DelAddRoaringBitmap { del, add } = self; + + if let Some(del) = del { + *documents_ids -= del; + } + + if let Some(add) = add { + *documents_ids |= add; + } + } +} diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs new file mode 100644 index 000000000..21fe4d518 --- /dev/null +++ b/crates/milli/src/update/new/extract/documents.rs @@ -0,0 +1,73 @@ +use std::cell::RefCell; + +use bumpalo::Bump; + +use super::DelAddRoaringBitmap; +use crate::update::new::channel::DocumentsSender; +use crate::update::new::document::write_to_obkv; +use crate::update::new::indexer::document_changes::{ + DocumentChangeContext, Extractor, FullySend, RefCellExt as _, +}; +use crate::update::new::DocumentChange; +use crate::Result; + +pub struct DocumentsExtractor<'a> { + documents_sender: &'a DocumentsSender<'a>, +} + +impl<'a> DocumentsExtractor<'a> { + pub fn new(documents_sender: &'a DocumentsSender<'a>) -> Self { + Self { documents_sender } + } +} + +impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> { + type Data = FullySend>; + + fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result { + Ok(FullySend(RefCell::new(DelAddRoaringBitmap::empty()))) + } + + fn process( + &self, + change: DocumentChange, + context: &DocumentChangeContext, + ) -> Result<()> { + let mut document_buffer = Vec::new(); + let mut delta_documents_ids = context.data.0.borrow_mut_or_yield(); + + let new_fields_ids_map = context.new_fields_ids_map.borrow_or_yield(); + let new_fields_ids_map = &*new_fields_ids_map; + let new_fields_ids_map = new_fields_ids_map.local_map(); + + let external_docid = change.external_docid().to_owned(); + + // document but we need to create a function that collects and compresses documents. + match change { + DocumentChange::Deletion(deletion) => { + let docid = deletion.docid(); + self.documents_sender.delete(docid, external_docid).unwrap(); + delta_documents_ids.insert_del_u32(docid); + } + /// TODO: change NONE by SOME(vector) when implemented + DocumentChange::Update(update) => { + let docid = update.docid(); + let content = + update.new(&context.txn, context.index, &context.db_fields_ids_map)?; + let content = + write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?; + self.documents_sender.uncompressed(docid, external_docid, content).unwrap(); + } + DocumentChange::Insertion(insertion) => { + let docid = insertion.docid(); + let content = insertion.new(); + let content = + write_to_obkv(&content, None, new_fields_ids_map, &mut document_buffer)?; + self.documents_sender.uncompressed(docid, external_docid, content).unwrap(); + delta_documents_ids.insert_add_u32(docid); + // extracted_dictionary_sender.send(self, dictionary: &[u8]); + } + } + Ok(()) + } +} diff --git a/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs similarity index 68% rename from milli/src/update/new/extract/faceted/extract_facets.rs rename to crates/milli/src/update/new/extract/faceted/extract_facets.rs index 2d740f1a3..108e4d422 100644 --- a/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -1,16 +1,12 @@ use std::cell::RefCell; use std::collections::HashSet; -use std::fmt::Debug; -use std::fs::File; use std::ops::DerefMut as _; use bumpalo::Bump; -use grenad::{MergeFunction, Merger}; use heed::RoTxn; -use rayon::iter::{ParallelBridge as _, ParallelIterator as _}; use serde_json::Value; -use super::super::cache::CboCachedSorter; +use super::super::cache::BalancedCaches; use super::facet_document::extract_document_facets; use super::FacetKind; use crate::facet::value_encoding::f64_into_bytes; @@ -20,38 +16,24 @@ use crate::update::new::indexer::document_changes::{ Progress, RefCellExt, ThreadLocal, }; use crate::update::new::DocumentChange; -use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; +use crate::update::GrenadParameters; use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH}; -pub struct FacetedExtractorData<'extractor> { - attributes_to_extract: &'extractor [&'extractor str], +pub struct FacetedExtractorData<'a> { + attributes_to_extract: &'a [&'a str], grenad_parameters: GrenadParameters, - max_memory: Option, + buckets: usize, } -impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> { - type Data = FullySend>>; +impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> { + type Data = RefCell>; - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(RefCell::new(CboCachedSorter::new( - // TODO use a better value - 1_000_000.try_into().unwrap(), - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - self.grenad_parameters.chunk_compression_type, - self.grenad_parameters.chunk_compression_level, - self.grenad_parameters.max_nb_chunks, - self.max_memory, - // *NOTE*: this must not be set to true: - // 1. we're already using max parallelism in the pool, so it wouldn't help - // 2. it creates correctness issues if it causes to yield a borrow-mut wielding task - false, - ), - )))) + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(BalancedCaches::new_in( + self.buckets, + self.grenad_parameters.max_memory, + extractor_alloc, + ))) } fn process<'doc>( @@ -75,16 +57,14 @@ pub struct FacetedDocidsExtractor; impl FacetedDocidsExtractor { fn extract_document_change( - context: &DocumentChangeContext< - FullySend>>, - >, + context: &DocumentChangeContext>, attributes_to_extract: &[&str], document_change: DocumentChange, ) -> Result<()> { let index = &context.index; let rtxn = &context.txn; let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); - let mut cached_sorter = context.data.0.borrow_mut_or_yield(); + let mut cached_sorter = context.data.borrow_mut_or_yield(); match document_change { DocumentChange::Deletion(inner) => extract_document_facets( attributes_to_extract, @@ -94,7 +74,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_del_u32, + BalancedCaches::insert_del_u32, inner.docid(), fid, value, @@ -110,7 +90,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_del_u32, + BalancedCaches::insert_del_u32, inner.docid(), fid, value, @@ -126,7 +106,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_add_u32, + BalancedCaches::insert_add_u32, inner.docid(), fid, value, @@ -142,7 +122,7 @@ impl FacetedDocidsExtractor { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), - CboCachedSorter::insert_add_u32, + BalancedCaches::insert_add_u32, inner.docid(), fid, value, @@ -152,25 +132,20 @@ impl FacetedDocidsExtractor { } } - fn facet_fn_with_options( + fn facet_fn_with_options<'extractor>( doc_alloc: &Bump, - cached_sorter: &mut CboCachedSorter, - cache_fn: impl Fn(&mut CboCachedSorter, &[u8], u32) -> grenad::Result<(), MF::Error>, + cached_sorter: &mut BalancedCaches<'extractor>, + cache_fn: impl Fn(&mut BalancedCaches<'extractor>, &[u8], u32) -> Result<()>, docid: DocumentId, fid: FieldId, value: &Value, - ) -> Result<()> - where - MF: MergeFunction, - MF::Error: Debug, - grenad::Error: Into, - { + ) -> Result<()> { let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc); // Exists // key: fid buffer.push(FacetKind::Exists as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into)?; + cache_fn(cached_sorter, &buffer, docid)?; match value { // Number @@ -185,8 +160,7 @@ impl FacetedDocidsExtractor { buffer.push(0); // level 0 buffer.extend_from_slice(&ordered); buffer.extend_from_slice(&n.to_be_bytes()); - - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } else { Ok(()) } @@ -201,7 +175,7 @@ impl FacetedDocidsExtractor { buffer.extend_from_slice(&fid.to_be_bytes()); buffer.push(0); // level 0 buffer.extend_from_slice(truncated.as_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } // Null // key: fid @@ -209,7 +183,7 @@ impl FacetedDocidsExtractor { buffer.clear(); buffer.push(FacetKind::Null as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } // Empty // key: fid @@ -217,13 +191,13 @@ impl FacetedDocidsExtractor { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } Value::Object(o) if o.is_empty() => { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); - cache_fn(cached_sorter, &buffer, docid).map_err(Into::into) + cache_fn(cached_sorter, &buffer, docid) } // Otherwise, do nothing /// TODO: What about Value::Bool? @@ -250,23 +224,20 @@ fn truncate_str(s: &str) -> &str { impl DocidsExtractor for FacetedDocidsExtractor { #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>, MSP, SP>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, - extractor_allocs: &mut ThreadLocal>>, + extractor_allocs: &mut ThreadLocal>, finished_steps: u16, total_steps: u16, step_name: &'static str, - ) -> Result> + ) -> Result>> where MSP: Fn() -> bool + Sync, SP: Fn(Progress) + Sync, { - let max_memory = grenad_parameters.max_memory_by_thread(); - let index = indexing_context.index; - let rtxn = index.read_txn()?; let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; let attributes_to_extract: Vec<_> = @@ -281,7 +252,7 @@ impl DocidsExtractor for FacetedDocidsExtractor { let extractor = FacetedExtractorData { attributes_to_extract: &attributes_to_extract, grenad_parameters, - max_memory, + buckets: rayon::current_num_threads(), }; extract( document_changes, @@ -294,26 +265,7 @@ impl DocidsExtractor for FacetedDocidsExtractor { step_name, )?; } - { - let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps); - let span = - tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); - let _entered = span.enter(); - let readers: Vec<_> = datastore - .into_iter() - .par_bridge() - .map(|cached_sorter| { - let cached_sorter = cached_sorter.0.into_inner(); - let sorter = cached_sorter.into_sorter()?; - sorter.into_reader_cursors() - }) - .collect(); - - for reader in readers { - builder.extend(reader?); - } - Ok(builder.build()) - } + Ok(datastore.into_iter().map(RefCell::into_inner).collect()) } } diff --git a/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs similarity index 100% rename from milli/src/update/new/extract/faceted/facet_document.rs rename to crates/milli/src/update/new/extract/faceted/facet_document.rs diff --git a/milli/src/update/new/extract/faceted/mod.rs b/crates/milli/src/update/new/extract/faceted/mod.rs similarity index 100% rename from milli/src/update/new/extract/faceted/mod.rs rename to crates/milli/src/update/new/extract/faceted/mod.rs diff --git a/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs similarity index 93% rename from milli/src/update/new/extract/mod.rs rename to crates/milli/src/update/new/extract/mod.rs index fb02b2c93..7f6b72c93 100644 --- a/milli/src/update/new/extract/mod.rs +++ b/crates/milli/src/update/new/extract/mod.rs @@ -1,15 +1,13 @@ mod cache; +mod documents; mod faceted; -mod lru; mod searchable; mod vectors; -use std::cell::RefCell; -use std::fs::File; - use bumpalo::Bump; +pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap}; +pub use documents::*; pub use faceted::*; -use grenad::Merger; pub use searchable::*; pub use vectors::EmbeddingExtractor; @@ -20,11 +18,11 @@ use crate::update::{GrenadParameters, MergeDeladdCboRoaringBitmaps}; use crate::Result; pub trait DocidsExtractor { - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>, MSP, SP>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, - extractor_allocs: &mut ThreadLocal>>, + extractor_allocs: &'extractor mut ThreadLocal>, finished_steps: u16, total_steps: u16, step_name: &'static str, diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs new file mode 100644 index 000000000..23bca784f --- /dev/null +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -0,0 +1,400 @@ +use std::cell::RefCell; +use std::collections::HashMap; +use std::mem::size_of; +use std::ops::DerefMut as _; + +use bumpalo::collections::vec::Vec as BumpVec; +use bumpalo::Bump; +use heed::RoTxn; + +use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; +use crate::update::new::extract::cache::BalancedCaches; +use crate::update::new::extract::perm_json_p::contained_in; +use crate::update::new::indexer::document_changes::{ + for_each_document_change, DocumentChangeContext, DocumentChanges, Extractor, FullySend, + IndexingContext, MostlySend, RefCellExt, ThreadLocal, +}; +use crate::update::new::DocumentChange; +use crate::update::GrenadParameters; +use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; + +const MAX_COUNTED_WORDS: usize = 30; + +pub struct WordDocidsBalancedCaches<'extractor> { + word_fid_docids: BalancedCaches<'extractor>, + word_docids: BalancedCaches<'extractor>, + exact_word_docids: BalancedCaches<'extractor>, + word_position_docids: BalancedCaches<'extractor>, + fid_word_count_docids: BalancedCaches<'extractor>, + fid_word_count: HashMap, + current_docid: Option, +} + +unsafe impl<'extractor> MostlySend for WordDocidsBalancedCaches<'extractor> {} + +impl<'extractor> WordDocidsBalancedCaches<'extractor> { + /// TODO Make sure to give the same max_memory to all of them, without splitting it + pub fn new_in(buckets: usize, max_memory: Option, alloc: &'extractor Bump) -> Self { + Self { + word_fid_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + word_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + exact_word_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + word_position_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + fid_word_count_docids: BalancedCaches::new_in(buckets, max_memory, alloc), + fid_word_count: HashMap::new(), + current_docid: None, + } + } + + fn insert_add_u32( + &mut self, + field_id: FieldId, + position: u16, + word: &str, + exact: bool, + docid: u32, + bump: &Bump, + ) -> Result<()> { + let word_bytes = word.as_bytes(); + if exact { + self.exact_word_docids.insert_add_u32(word_bytes, docid)?; + } else { + self.word_docids.insert_add_u32(word_bytes, docid)?; + } + + let buffer_size = word_bytes.len() + 1 + size_of::(); + let mut buffer = BumpVec::with_capacity_in(buffer_size, bump); + + buffer.clear(); + buffer.extend_from_slice(word_bytes); + buffer.push(0); + buffer.extend_from_slice(&field_id.to_be_bytes()); + self.word_fid_docids.insert_add_u32(&buffer, docid)?; + + let position = bucketed_position(position); + buffer.clear(); + buffer.extend_from_slice(word_bytes); + buffer.push(0); + buffer.extend_from_slice(&position.to_be_bytes()); + self.word_position_docids.insert_add_u32(&buffer, docid)?; + + if self.current_docid.map_or(false, |id| docid != id) { + self.flush_fid_word_count(&mut buffer)?; + } + + self.fid_word_count + .entry(field_id) + .and_modify(|(_current_count, new_count)| *new_count += 1) + .or_insert((0, 1)); + self.current_docid = Some(docid); + + Ok(()) + } + + fn insert_del_u32( + &mut self, + field_id: FieldId, + position: u16, + word: &str, + exact: bool, + docid: u32, + bump: &Bump, + ) -> Result<()> { + let word_bytes = word.as_bytes(); + if exact { + self.exact_word_docids.insert_del_u32(word_bytes, docid)?; + } else { + self.word_docids.insert_del_u32(word_bytes, docid)?; + } + + let buffer_size = word_bytes.len() + 1 + size_of::(); + let mut buffer = BumpVec::with_capacity_in(buffer_size, bump); + + buffer.clear(); + buffer.extend_from_slice(word_bytes); + buffer.push(0); + buffer.extend_from_slice(&field_id.to_be_bytes()); + self.word_fid_docids.insert_del_u32(&buffer, docid)?; + + let position = bucketed_position(position); + buffer.clear(); + buffer.extend_from_slice(word_bytes); + buffer.push(0); + buffer.extend_from_slice(&position.to_be_bytes()); + self.word_position_docids.insert_del_u32(&buffer, docid)?; + + if self.current_docid.map_or(false, |id| docid != id) { + self.flush_fid_word_count(&mut buffer)?; + } + + self.fid_word_count + .entry(field_id) + .and_modify(|(current_count, _new_count)| *current_count += 1) + .or_insert((1, 0)); + + self.current_docid = Some(docid); + + Ok(()) + } + + fn flush_fid_word_count(&mut self, buffer: &mut BumpVec) -> Result<()> { + for (fid, (current_count, new_count)) in self.fid_word_count.drain() { + if current_count != new_count { + if current_count <= MAX_COUNTED_WORDS { + buffer.clear(); + buffer.extend_from_slice(&fid.to_be_bytes()); + buffer.push(current_count as u8); + self.fid_word_count_docids + .insert_del_u32(buffer, self.current_docid.unwrap())?; + } + if new_count <= MAX_COUNTED_WORDS { + buffer.clear(); + buffer.extend_from_slice(&fid.to_be_bytes()); + buffer.push(new_count as u8); + self.fid_word_count_docids + .insert_add_u32(buffer, self.current_docid.unwrap())?; + } + } + } + + Ok(()) + } +} + +pub struct WordDocidsCaches<'extractor> { + pub word_docids: Vec>, + pub word_fid_docids: Vec>, + pub exact_word_docids: Vec>, + pub word_position_docids: Vec>, + pub fid_word_count_docids: Vec>, +} + +impl<'extractor> WordDocidsCaches<'extractor> { + fn new() -> Self { + Self { + word_docids: Vec::new(), + word_fid_docids: Vec::new(), + exact_word_docids: Vec::new(), + word_position_docids: Vec::new(), + fid_word_count_docids: Vec::new(), + } + } + + fn push(&mut self, other: WordDocidsBalancedCaches<'extractor>) -> Result<()> { + let WordDocidsBalancedCaches { + word_docids, + word_fid_docids, + exact_word_docids, + word_position_docids, + fid_word_count_docids, + fid_word_count: _, + current_docid: _, + } = other; + + self.word_docids.push(word_docids); + self.word_fid_docids.push(word_fid_docids); + self.exact_word_docids.push(exact_word_docids); + self.word_position_docids.push(word_position_docids); + self.fid_word_count_docids.push(fid_word_count_docids); + + Ok(()) + } +} + +pub struct WordDocidsExtractorData<'a> { + tokenizer: &'a DocumentTokenizer<'a>, + grenad_parameters: GrenadParameters, + buckets: usize, +} + +impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { + type Data = RefCell>>; + + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in( + self.buckets, + self.grenad_parameters.max_memory, + extractor_alloc, + )))) + } + + fn process( + &self, + change: DocumentChange, + context: &DocumentChangeContext, + ) -> Result<()> { + WordDocidsExtractors::extract_document_change(context, self.tokenizer, change) + } +} + +pub struct WordDocidsExtractors; + +impl WordDocidsExtractors { + pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>>( + grenad_parameters: GrenadParameters, + document_changes: &DC, + indexing_context: IndexingContext<'fid, 'indexer, 'index>, + extractor_allocs: &'extractor mut ThreadLocal>, + ) -> Result> { + let index = indexing_context.index; + let rtxn = index.read_txn()?; + + let stop_words = index.stop_words(&rtxn)?; + let allowed_separators = index.allowed_separators(&rtxn)?; + let allowed_separators: Option> = + allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect()); + let dictionary = index.dictionary(&rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); + let builder = tokenizer_builder( + stop_words.as_ref(), + allowed_separators.as_deref(), + dictionary.as_deref(), + ); + let tokenizer = builder.into_tokenizer(); + + let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; + let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?; + let localized_attributes_rules = + index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); + + let document_tokenizer = DocumentTokenizer { + tokenizer: &tokenizer, + attribute_to_extract: attributes_to_extract.as_deref(), + attribute_to_skip: attributes_to_skip.as_slice(), + localized_attributes_rules: &localized_attributes_rules, + max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, + }; + + let datastore = ThreadLocal::new(); + + { + let span = + tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); + let _entered = span.enter(); + + let extractor = WordDocidsExtractorData { + tokenizer: &document_tokenizer, + grenad_parameters, + buckets: rayon::current_num_threads(), + }; + + for_each_document_change( + document_changes, + &extractor, + indexing_context, + extractor_allocs, + &datastore, + )?; + } + + let mut merger = WordDocidsCaches::new(); + for cache in datastore.into_iter().flat_map(RefCell::into_inner) { + merger.push(cache)?; + } + + Ok(merger) + } + + fn extract_document_change( + context: &DocumentChangeContext>>, + document_tokenizer: &DocumentTokenizer, + document_change: DocumentChange, + ) -> Result<()> { + let index = &context.index; + let rtxn = &context.txn; + let mut cached_sorter_ref = context.data.borrow_mut_or_yield(); + let cached_sorter = cached_sorter_ref.as_mut().unwrap(); + let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); + let new_fields_ids_map = new_fields_ids_map.deref_mut(); + let doc_alloc = &context.doc_alloc; + + let exact_attributes = index.exact_attributes(rtxn)?; + let is_exact_attribute = + |fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr)); + match document_change { + DocumentChange::Deletion(inner) => { + let mut token_fn = |fname: &str, fid, pos, word: &str| { + cached_sorter.insert_del_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) + }; + document_tokenizer.tokenize_document( + inner.current(rtxn, index, context.db_fields_ids_map)?, + new_fields_ids_map, + &mut token_fn, + )?; + } + DocumentChange::Update(inner) => { + let mut token_fn = |fname: &str, fid, pos, word: &str| { + cached_sorter.insert_del_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) + }; + document_tokenizer.tokenize_document( + inner.current(rtxn, index, context.db_fields_ids_map)?, + new_fields_ids_map, + &mut token_fn, + )?; + + let mut token_fn = |fname: &str, fid, pos, word: &str| { + cached_sorter.insert_add_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) + }; + document_tokenizer.tokenize_document( + inner.new(rtxn, index, context.db_fields_ids_map)?, + new_fields_ids_map, + &mut token_fn, + )?; + } + DocumentChange::Insertion(inner) => { + let mut token_fn = |fname: &str, fid, pos, word: &str| { + cached_sorter.insert_add_u32( + fid, + pos, + word, + is_exact_attribute(fname), + inner.docid(), + doc_alloc, + ) + }; + document_tokenizer.tokenize_document( + inner.new(), + new_fields_ids_map, + &mut token_fn, + )?; + } + } + + let buffer_size = size_of::(); + let mut buffer = BumpVec::with_capacity_in(buffer_size, &context.doc_alloc); + cached_sorter.flush_fid_word_count(&mut buffer) + } + + fn attributes_to_extract<'a>( + rtxn: &'a RoTxn, + index: &'a Index, + ) -> Result>> { + index.user_defined_searchable_fields(rtxn).map_err(Into::into) + } + + fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result> { + Ok(vec![]) + } +} diff --git a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs similarity index 92% rename from milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs rename to crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs index 1bd3aee36..796b8c943 100644 --- a/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs @@ -8,13 +8,13 @@ use super::tokenize_document::DocumentTokenizer; use super::SearchableExtractor; use crate::proximity::{index_proximity, MAX_DISTANCE}; use crate::update::new::document::Document; -use crate::update::new::extract::cache::CboCachedSorter; -use crate::update::new::indexer::document_changes::{DocumentChangeContext, FullySend, RefCellExt}; +use crate::update::new::extract::cache::BalancedCaches; +use crate::update::new::indexer::document_changes::{DocumentChangeContext, RefCellExt}; use crate::update::new::DocumentChange; -use crate::update::MergeDeladdCboRoaringBitmaps; use crate::{FieldId, GlobalFieldsIdsMap, Index, Result}; pub struct WordPairProximityDocidsExtractor; + impl SearchableExtractor for WordPairProximityDocidsExtractor { fn attributes_to_extract<'a>( rtxn: &'a RoTxn, @@ -28,11 +28,10 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { } // This method is reimplemented to count the number of words in the document in each field - // and to store the docids of the documents that have a number of words in a given field equal to or under than MAX_COUNTED_WORDS. + // and to store the docids of the documents that have a number of words in a given field + // equal to or under than MAX_COUNTED_WORDS. fn extract_document_change( - context: &DocumentChangeContext< - FullySend>>, - >, + context: &DocumentChangeContext>, document_tokenizer: &DocumentTokenizer, document_change: DocumentChange, ) -> Result<()> { @@ -48,7 +47,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); let new_fields_ids_map = &mut *new_fields_ids_map; - let mut cached_sorter = context.data.0.borrow_mut_or_yield(); + let mut cached_sorter = context.data.borrow_mut_or_yield(); let cached_sorter = &mut *cached_sorter; // is a vecdequeue, and will be smol, so can stay on the heap for now @@ -139,7 +138,7 @@ fn build_key<'a>( fn word_positions_into_word_pair_proximity( word_positions: &mut VecDeque<(Rc, u16)>, word_pair_proximity: &mut impl FnMut((Rc, Rc), u8), -) -> Result<()> { +) { let (head_word, head_position) = word_positions.pop_front().unwrap(); for (word, position) in word_positions.iter() { let prox = index_proximity(head_position as u32, *position as u32) as u8; @@ -147,7 +146,6 @@ fn word_positions_into_word_pair_proximity( word_pair_proximity((head_word.clone(), word.clone()), prox); } } - Ok(()) } fn process_document_tokens<'doc>( @@ -163,7 +161,7 @@ fn process_document_tokens<'doc>( .front() .map_or(false, |(_w, p)| index_proximity(*p as u32, pos as u32) >= MAX_DISTANCE) { - word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?; + word_positions_into_word_pair_proximity(word_positions, word_pair_proximity); } // insert the new word. @@ -173,7 +171,7 @@ fn process_document_tokens<'doc>( document_tokenizer.tokenize_document(document, fields_ids_map, &mut token_fn)?; while !word_positions.is_empty() { - word_positions_into_word_pair_proximity(word_positions, word_pair_proximity)?; + word_positions_into_word_pair_proximity(word_positions, word_pair_proximity); } Ok(()) diff --git a/milli/src/update/new/extract/searchable/mod.rs b/crates/milli/src/update/new/extract/searchable/mod.rs similarity index 61% rename from milli/src/update/new/extract/searchable/mod.rs rename to crates/milli/src/update/new/extract/searchable/mod.rs index e16e83167..b75a01cd2 100644 --- a/milli/src/update/new/extract/searchable/mod.rs +++ b/crates/milli/src/update/new/extract/searchable/mod.rs @@ -3,56 +3,42 @@ mod extract_word_pair_proximity_docids; mod tokenize_document; use std::cell::RefCell; -use std::fs::File; use std::marker::PhantomData; use bumpalo::Bump; -pub use extract_word_docids::{WordDocidsExtractors, WordDocidsMergers}; +pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors}; pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor; -use grenad::Merger; use heed::RoTxn; -use rayon::iter::{ParallelBridge, ParallelIterator}; use tokenize_document::{tokenizer_builder, DocumentTokenizer}; -use super::cache::CboCachedSorter; +use super::cache::BalancedCaches; use super::DocidsExtractor; use crate::update::new::indexer::document_changes::{ extract, DocumentChangeContext, DocumentChanges, Extractor, FullySend, IndexingContext, Progress, ThreadLocal, }; use crate::update::new::DocumentChange; -use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; +use crate::update::GrenadParameters; use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE}; -pub struct SearchableExtractorData<'extractor, EX: SearchableExtractor> { - tokenizer: &'extractor DocumentTokenizer<'extractor>, +pub struct SearchableExtractorData<'a, EX: SearchableExtractor> { + tokenizer: &'a DocumentTokenizer<'a>, grenad_parameters: GrenadParameters, - max_memory: Option, + buckets: usize, _ex: PhantomData, } -impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> - for SearchableExtractorData<'extractor, EX> +impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> + for SearchableExtractorData<'a, EX> { - type Data = FullySend>>; + type Data = RefCell>; - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(RefCell::new(CboCachedSorter::new( - // TODO use a better value - 1_000_000.try_into().unwrap(), - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - self.grenad_parameters.chunk_compression_type, - self.grenad_parameters.chunk_compression_level, - self.grenad_parameters.max_nb_chunks, - self.max_memory, - false, - ), - )))) + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(BalancedCaches::new_in( + self.buckets, + self.grenad_parameters.max_memory, + extractor_alloc, + ))) } fn process<'doc>( @@ -69,21 +55,19 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> } pub trait SearchableExtractor: Sized + Sync { - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>, MSP, SP>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, - extractor_allocs: &mut ThreadLocal>>, + extractor_allocs: &'extractor mut ThreadLocal>, finished_steps: u16, total_steps: u16, step_name: &'static str, - ) -> Result> + ) -> Result>> where MSP: Fn() -> bool + Sync, SP: Fn(Progress) + Sync, { - let max_memory = grenad_parameters.max_memory_by_thread(); - let rtxn = indexing_context.index.read_txn()?; let stop_words = indexing_context.index.stop_words(&rtxn)?; let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?; @@ -115,7 +99,7 @@ pub trait SearchableExtractor: Sized + Sync { let extractor_data: SearchableExtractorData = SearchableExtractorData { tokenizer: &document_tokenizer, grenad_parameters, - max_memory, + buckets: rayon::current_num_threads(), _ex: PhantomData, }; @@ -136,37 +120,12 @@ pub trait SearchableExtractor: Sized + Sync { step_name, )?; } - { - let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps); - let span = - tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); - let _entered = span.enter(); - let readers: Vec<_> = datastore - .into_iter() - .par_bridge() - .map(|cache_entry| { - let cached_sorter: FullySend< - RefCell>, - > = cache_entry; - let cached_sorter = cached_sorter.0.into_inner(); - let sorter = cached_sorter.into_sorter()?; - sorter.into_reader_cursors() - }) - .collect(); - - for reader in readers { - builder.extend(reader?); - } - - Ok(builder.build()) - } + Ok(datastore.into_iter().map(RefCell::into_inner).collect()) } fn extract_document_change( - context: &DocumentChangeContext< - FullySend>>, - >, + context: &DocumentChangeContext>, document_tokenizer: &DocumentTokenizer, document_change: DocumentChange, ) -> Result<()>; @@ -178,15 +137,15 @@ pub trait SearchableExtractor: Sized + Sync { } impl DocidsExtractor for T { - fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>, MSP, SP>( + fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( grenad_parameters: GrenadParameters, document_changes: &DC, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, - extractor_allocs: &mut ThreadLocal>>, + extractor_allocs: &'extractor mut ThreadLocal>, finished_steps: u16, total_steps: u16, step_name: &'static str, - ) -> Result> + ) -> Result>> where MSP: Fn() -> bool + Sync, SP: Fn(Progress) + Sync, diff --git a/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs similarity index 100% rename from milli/src/update/new/extract/searchable/tokenize_document.rs rename to crates/milli/src/update/new/extract/searchable/tokenize_document.rs diff --git a/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs similarity index 100% rename from milli/src/update/new/extract/vectors/mod.rs rename to crates/milli/src/update/new/extract/vectors/mod.rs diff --git a/crates/milli/src/update/new/facet_search_builder.rs b/crates/milli/src/update/new/facet_search_builder.rs new file mode 100644 index 000000000..839120540 --- /dev/null +++ b/crates/milli/src/update/new/facet_search_builder.rs @@ -0,0 +1,264 @@ +use std::collections::{BTreeSet, HashMap}; + +use charabia::normalizer::NormalizerOption; +use charabia::{Language, Normalize, StrDetection, Token}; +use grenad::Sorter; +use heed::types::{Bytes, SerdeJson}; +use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn}; + +use super::extract::FacetKind; +use super::fst_merger_builder::FstMergerBuilder; +use super::KvReaderDelAdd; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; +use crate::heed_codec::StrRefCodec; +use crate::update::del_add::{DelAdd, KvWriterDelAdd}; +use crate::update::{create_sorter, MergeDeladdBtreesetString}; +use crate::{ + BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result, + MAX_FACET_VALUE_LENGTH, +}; + +pub struct FacetSearchBuilder<'indexer> { + registered_facets: HashMap, + normalized_facet_string_docids_sorter: Sorter, + global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, + localized_attributes_rules: Vec, + // Buffered data below + buffer: Vec, + localized_field_ids: HashMap>>, +} + +impl<'indexer> FacetSearchBuilder<'indexer> { + pub fn new( + global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, + localized_attributes_rules: Vec, + ) -> Self { + let registered_facets = HashMap::new(); + let normalized_facet_string_docids_sorter = create_sorter( + grenad::SortAlgorithm::Stable, + MergeDeladdBtreesetString, + grenad::CompressionType::None, + None, + None, + Some(0), + true, + ); + + Self { + registered_facets, + normalized_facet_string_docids_sorter, + buffer: Vec::new(), + global_fields_ids_map, + localized_attributes_rules, + localized_field_ids: HashMap::new(), + } + } + + fn extract_key_data<'k>(&self, key: &'k [u8]) -> Result>> { + match FacetKind::from(key[0]) { + // Only strings are searchable + FacetKind::String => Ok(Some( + FacetGroupKeyCodec::::bytes_decode(&key[1..]) + .map_err(heed::Error::Encoding)?, + )), + _ => Ok(None), + } + } + + pub fn register_from_key(&mut self, deladd: DelAdd, facet_key: &[u8]) -> Result<()> { + let Some(FacetGroupKey { field_id, level: _level, left_bound }) = + self.extract_key_data(facet_key)? + else { + return Ok(()); + }; + + if deladd == DelAdd::Addition { + self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1); + } + + let locales = self.locales(field_id); + let hyper_normalized_value = normalize_facet_string(left_bound, locales); + + let set = BTreeSet::from_iter(std::iter::once(left_bound)); + + // as the facet string is the same, we can put the deletion and addition in the same obkv. + self.buffer.clear(); + let mut obkv = KvWriterDelAdd::new(&mut self.buffer); + let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?; + obkv.insert(deladd, val)?; + obkv.finish()?; + + let key: (u16, &str) = (field_id, hyper_normalized_value.as_ref()); + let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; + self.normalized_facet_string_docids_sorter.insert(key_bytes, &self.buffer)?; + + Ok(()) + } + + fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> { + if !self.localized_field_ids.contains_key(&field_id) { + let Some(field_name) = self.global_fields_ids_map.name(field_id) else { + unreachable!("Field id {} not found in the global fields ids map", field_id); + }; + + let locales = self + .localized_attributes_rules + .iter() + .find(|rule| rule.match_str(field_name)) + .map(|rule| rule.locales.clone()); + + self.localized_field_ids.insert(field_id, locales); + } + + self.localized_field_ids.get(&field_id).unwrap().as_deref() + } + + #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")] + pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> { + let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?; + let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString); + builder.extend(reader); + + let database = index.facet_id_normalized_string_strings.remap_types::(); + + let mut merger_iter = builder.build().into_stream_merger_iter()?; + let mut current_field_id = None; + let mut fst; + let mut fst_merger_builder: Option = None; + while let Some((key, deladd)) = merger_iter.next()? { + let (field_id, normalized_facet_string) = + BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?; + + if current_field_id != Some(field_id) { + if let Some(fst_merger_builder) = fst_merger_builder { + let mmap = fst_merger_builder.build(&mut callback)?; + index + .facet_id_string_fst + .remap_data_type::() + .put(wtxn, &field_id, &mmap)?; + } + + fst = index.facet_id_string_fst.get(rtxn, &field_id)?; + fst_merger_builder = Some(FstMergerBuilder::new(fst.as_ref())?); + current_field_id = Some(field_id); + } + + let previous = database.get(rtxn, key)?; + let deladd: &KvReaderDelAdd = deladd.into(); + let del = deladd.get(DelAdd::Deletion); + let add = deladd.get(DelAdd::Addition); + + match merge_btreesets(previous, del, add)? { + Operation::Write(value) => { + match fst_merger_builder.as_mut() { + Some(fst_merger_builder) => { + fst_merger_builder.register( + DelAdd::Addition, + normalized_facet_string.as_bytes(), + &mut callback, + )?; + } + None => unreachable!(), + } + let key = (field_id, normalized_facet_string); + let key_bytes = + BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; + database.put(wtxn, &key_bytes, &value)?; + } + Operation::Delete => { + match fst_merger_builder.as_mut() { + Some(fst_merger_builder) => { + fst_merger_builder.register( + DelAdd::Deletion, + normalized_facet_string.as_bytes(), + &mut callback, + )?; + } + None => unreachable!(), + } + let key = (field_id, normalized_facet_string); + let key_bytes = + BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?; + database.delete(wtxn, &key_bytes)?; + } + Operation::Ignore => (), + } + } + + if let (Some(field_id), Some(fst_merger_builder)) = (current_field_id, fst_merger_builder) { + let mmap = fst_merger_builder.build(&mut callback)?; + index.facet_id_string_fst.remap_data_type::().put(wtxn, &field_id, &mmap)?; + } + + Ok(()) + } +} + +fn callback(_bytes: &[u8], _deladd: DelAdd, _is_modified: bool) -> Result<()> { + Ok(()) +} + +fn merge_btreesets( + current: Option<&[u8]>, + del: Option<&[u8]>, + add: Option<&[u8]>, +) -> Result { + let mut result: BTreeSet = match current { + Some(current) => SerdeJson::bytes_decode(current).map_err(heed::Error::Encoding)?, + None => BTreeSet::new(), + }; + if let Some(del) = del { + let del: BTreeSet = SerdeJson::bytes_decode(del).map_err(heed::Error::Encoding)?; + result = result.difference(&del).cloned().collect(); + } + if let Some(add) = add { + let add: BTreeSet = SerdeJson::bytes_decode(add).map_err(heed::Error::Encoding)?; + result.extend(add); + } + + /// TODO remove allocation + let result = SerdeJson::bytes_encode(&result).map_err(heed::Error::Encoding)?.into_owned(); + if Some(result.as_ref()) == current { + Ok(Operation::Ignore) + } else if result.is_empty() { + Ok(Operation::Delete) + } else { + Ok(Operation::Write(result)) + } +} + +/// Normalizes the facet string and truncates it to the max length. +fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String { + let options: NormalizerOption = NormalizerOption { lossy: true, ..Default::default() }; + let mut detection = StrDetection::new(facet_string, locales); + + let script = detection.script(); + // Detect the language of the facet string only if several locales are explicitly provided. + let language = match locales { + Some(&[language]) => Some(language), + Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(), + _ => None, + }; + + let token = Token { + lemma: std::borrow::Cow::Borrowed(facet_string), + script, + language, + ..Default::default() + }; + + // truncate the facet string to the max length + token + .normalize(&options) + .lemma + .char_indices() + .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH) + .map(|(_, c)| c) + .collect() +} + +enum Operation { + Write(Vec), + Delete, + Ignore, +} diff --git a/crates/milli/src/update/new/fst_merger_builder.rs b/crates/milli/src/update/new/fst_merger_builder.rs new file mode 100644 index 000000000..9fd259ce6 --- /dev/null +++ b/crates/milli/src/update/new/fst_merger_builder.rs @@ -0,0 +1,155 @@ +use std::{fs::File, io::BufWriter}; + +use fst::{Set, SetBuilder, Streamer}; +use memmap2::Mmap; +use tempfile::tempfile; + +use crate::{update::del_add::DelAdd, InternalError, Result}; + +pub struct FstMergerBuilder<'a> { + stream: Option>, + fst_builder: SetBuilder>, + last: Option>, + inserted_words: usize, +} + +impl<'a> FstMergerBuilder<'a> { + pub fn new>(fst: Option<&'a Set>) -> Result { + Ok(Self { + stream: fst.map(|fst| fst.stream()), + fst_builder: SetBuilder::new(BufWriter::new(tempfile()?))?, + last: None, + inserted_words: 0, + }) + } + + pub fn register( + &mut self, + deladd: DelAdd, + right: &[u8], + insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, + ) -> Result<()> { + if let Some(left) = self.last.take() { + let (left_inserted, right_inserted) = + self.compare_and_insert(deladd, left.as_slice(), right, insertion_callback)?; + + // left was not inserted, so we keep it for the next iteration + if !left_inserted { + self.last = Some(left); + } + + // right was inserted, so we can stop + if right_inserted { + return Ok(()); + } + } + + if let Some(mut stream) = self.stream.take() { + while let Some(left) = stream.next() { + let (left_inserted, right_inserted) = + self.compare_and_insert(deladd, left, right, insertion_callback)?; + + // left was not inserted, so we keep it for the next iteration + if !left_inserted { + self.last = Some(left.to_vec()); + } + + // right was inserted, so we can stop + if right_inserted { + self.stream = Some(stream); + return Ok(()); + } + } + } + + // If we reach this point, it means that the stream is empty + // and we need to insert the incoming word + self.insert(right, deladd, true, insertion_callback)?; + + Ok(()) + } + + fn compare_and_insert( + &mut self, + deladd: DelAdd, + left: &[u8], + right: &[u8], + insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, + ) -> Result<(bool, bool)> { + let mut left_inserted = false; + let mut right_inserted = false; + match left.cmp(right) { + std::cmp::Ordering::Less => { + // We need to insert the last word from the current fst + self.insert(left, DelAdd::Addition, false, insertion_callback)?; + + left_inserted = true; + } + std::cmp::Ordering::Equal => { + self.insert(right, deladd, true, insertion_callback)?; + + left_inserted = true; + right_inserted = true; + } + std::cmp::Ordering::Greater => { + self.insert(right, deladd, true, insertion_callback)?; + + right_inserted = true; + } + } + + Ok((left_inserted, right_inserted)) + } + + fn insert( + &mut self, + bytes: &[u8], + deladd: DelAdd, + is_modified: bool, + insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, + ) -> Result<()> { + // Addition: We insert the word + // Deletion: We delete the word by not inserting it + if deladd == DelAdd::Addition { + self.inserted_words += 1; + self.fst_builder.insert(bytes)?; + } + + insertion_callback(bytes, deladd, is_modified)?; + + Ok(()) + } + + fn drain_stream( + &mut self, + insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, + ) -> Result<()> { + if let Some(last) = self.last.take() { + self.insert(last.as_slice(), DelAdd::Addition, false, insertion_callback)?; + } + + if let Some(mut stream) = self.stream.take() { + while let Some(current) = stream.next() { + self.insert(current, DelAdd::Addition, false, insertion_callback)?; + } + } + + Ok(()) + } + + pub fn build( + mut self, + insertion_callback: &mut impl FnMut(&[u8], DelAdd, bool) -> Result<()>, + ) -> Result { + self.drain_stream(insertion_callback)?; + + let fst_file = self + .fst_builder + .into_inner()? + .into_inner() + .map_err(|_| InternalError::IndexingMergingKeys { process: "building-fst" })?; + let fst_mmap = unsafe { Mmap::map(&fst_file)? }; + + Ok(fst_mmap) + } +} diff --git a/milli/src/update/new/indexer/de.rs b/crates/milli/src/update/new/indexer/de.rs similarity index 100% rename from milli/src/update/new/indexer/de.rs rename to crates/milli/src/update/new/indexer/de.rs diff --git a/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs similarity index 95% rename from milli/src/update/new/indexer/document_changes.rs rename to crates/milli/src/update/new/indexer/document_changes.rs index aad190269..0a9155b1c 100644 --- a/milli/src/update/new/indexer/document_changes.rs +++ b/crates/milli/src/update/new/indexer/document_changes.rs @@ -3,7 +3,6 @@ use std::sync::{Arc, RwLock}; use bumpalo::Bump; use heed::RoTxn; -use raw_collections::alloc::RefBump; use rayon::iter::IndexedParallelIterator; use super::super::document_change::DocumentChange; @@ -105,6 +104,10 @@ pub struct FullySend(pub T); // SAFETY: a type **fully** send is always mostly send as well. unsafe impl MostlySend for FullySend where T: Send {} +unsafe impl MostlySend for RefCell where T: MostlySend {} + +unsafe impl MostlySend for Option where T: MostlySend {} + impl FullySend { pub fn into(self) -> T { self.0 @@ -257,7 +260,7 @@ pub struct DocumentChangeContext< pub doc_alloc: Bump, /// Data allocated in this allocator is not cleared between each call to `process`, unless the data spills. - pub extractor_alloc: RefBump<'extractor>, + pub extractor_alloc: &'extractor Bump, /// Pool of doc allocators, used to retrieve the doc allocator we provided for the documents doc_allocs: &'doc ThreadLocal>>, @@ -280,14 +283,14 @@ impl< index: &'indexer Index, db_fields_ids_map: &'indexer FieldsIdsMap, new_fields_ids_map: &'fid RwLock, - extractor_allocs: &'extractor ThreadLocal>>, + extractor_allocs: &'extractor ThreadLocal>, doc_allocs: &'doc ThreadLocal>>, datastore: &'data ThreadLocal, fields_ids_map_store: &'doc ThreadLocal>>>, init_data: F, ) -> Result where - F: FnOnce(RefBump<'extractor>) -> Result, + F: FnOnce(&'extractor Bump) -> Result, { let doc_alloc = doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024)))); @@ -298,9 +301,7 @@ impl< let fields_ids_map = &fields_ids_map.0; let extractor_alloc = extractor_allocs.get_or_default(); - let extractor_alloc = RefBump::new(extractor_alloc.0.borrow_or_yield()); - - let data = datastore.get_or_try(|| init_data(RefBump::clone(&extractor_alloc)))?; + let data = datastore.get_or_try(move || init_data(&extractor_alloc.0))?; let txn = index.read_txn()?; Ok(DocumentChangeContext { @@ -309,7 +310,7 @@ impl< db_fields_ids_map, new_fields_ids_map: fields_ids_map, doc_alloc, - extractor_alloc, + extractor_alloc: &extractor_alloc.0, data, doc_allocs, }) @@ -320,7 +321,7 @@ impl< pub trait Extractor<'extractor>: Sync { type Data: MostlySend; - fn init_data<'doc>(&'doc self, extractor_alloc: RefBump<'extractor>) -> Result; + fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> Result; fn process<'doc>( &'doc self, @@ -438,7 +439,7 @@ pub fn extract< must_stop_processing, send_progress, }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, - extractor_allocs: &'extractor mut ThreadLocal>>, + extractor_allocs: &'extractor mut ThreadLocal>, datastore: &'data ThreadLocal, finished_steps: u16, total_steps: u16, @@ -449,9 +450,11 @@ where MSP: Fn() -> bool + Sync, SP: Fn(Progress) + Sync, { + eprintln!("We are resetting the extractor allocators"); // Clean up and reuse the extractor allocs for extractor_alloc in extractor_allocs.iter_mut() { - extractor_alloc.0.get_mut().reset(); + eprintln!("\tWith {} bytes resetted", extractor_alloc.0.allocated_bytes()); + extractor_alloc.0.reset(); } let total_documents = document_changes.len(); diff --git a/milli/src/update/new/indexer/document_deletion.rs b/crates/milli/src/update/new/indexer/document_deletion.rs similarity index 92% rename from milli/src/update/new/indexer/document_deletion.rs rename to crates/milli/src/update/new/indexer/document_deletion.rs index 130560a44..d7648acd8 100644 --- a/milli/src/update/new/indexer/document_deletion.rs +++ b/crates/milli/src/update/new/indexer/document_deletion.rs @@ -88,7 +88,6 @@ mod test { use std::sync::RwLock; use bumpalo::Bump; - use raw_collections::alloc::RefBump; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::index::tests::TempIndex; @@ -103,11 +102,7 @@ mod test { fn test_deletions() { struct DeletionWithData<'extractor> { deleted: RefCell< - hashbrown::HashSet< - DocumentId, - hashbrown::hash_map::DefaultHashBuilder, - RefBump<'extractor>, - >, + hashbrown::HashSet, >, } @@ -118,10 +113,7 @@ mod test { impl<'extractor> Extractor<'extractor> for TrackDeletion<'extractor> { type Data = DeletionWithData<'extractor>; - fn init_data( - &self, - extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> crate::Result { + fn init_data(&self, extractor_alloc: &'extractor Bump) -> crate::Result { let deleted = RefCell::new(hashbrown::HashSet::new_in(extractor_alloc)); Ok(DeletionWithData { deleted }) } @@ -191,8 +183,7 @@ mod test { println!("deleted by {index}: {:?}", data.deleted.borrow()); } for alloc in extractor_allocs.iter_mut() { - let alloc = &mut alloc.0; - alloc.get_mut().reset(); + alloc.0.reset(); } } } diff --git a/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs similarity index 100% rename from milli/src/update/new/indexer/document_operation.rs rename to crates/milli/src/update/new/indexer/document_operation.rs diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs new file mode 100644 index 000000000..430313fbd --- /dev/null +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -0,0 +1,519 @@ +use std::cmp::Ordering; +use std::sync::RwLock; +use std::thread::{self, Builder}; + +use big_s::S; +use document_changes::{ + for_each_document_change, DocumentChanges, FullySend, IndexingContext, ThreadLocal, +}; +pub use document_deletion::DocumentDeletion; +pub use document_operation::DocumentOperation; +use heed::types::{Bytes, DecodeIgnore, Str}; +use heed::{RoTxn, RwTxn}; +use itertools::{merge_join_by, EitherOrBoth}; +pub use partial_dump::PartialDump; +use rayon::ThreadPool; +use time::OffsetDateTime; +pub use update_by_function::UpdateByFunction; + +use super::channel::*; +use super::extract::*; +use super::facet_search_builder::FacetSearchBuilder; +use super::merger::{FacetDatabases, FacetFieldIdsDelta}; +use super::word_fst_builder::PrefixDelta; +use super::words_prefix_docids::{ + compute_word_prefix_docids, compute_word_prefix_fid_docids, compute_word_prefix_position_docids, +}; +use super::{StdResult, TopLevelMap}; +use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY}; +use crate::facet::FacetType; +use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; +use crate::proximity::ProximityPrecision; +use crate::update::del_add::DelAdd; +use crate::update::new::word_fst_builder::{PrefixData, WordFstBuilder}; +use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids; +use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids}; +use crate::update::settings::InnerIndexSettings; +use crate::update::{FacetsUpdateBulk, GrenadParameters}; +use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError}; + +pub mod de; +pub mod document_changes; +mod document_deletion; +mod document_operation; +mod partial_dump; +mod update_by_function; + +/// This is the main function of this crate. +/// +/// Give it the output of the [`Indexer::document_changes`] method and it will execute it in the [`rayon::ThreadPool`]. +/// +/// TODO return stats +pub fn index<'pl, 'indexer, 'index, DC>( + wtxn: &mut RwTxn, + index: &'index Index, + db_fields_ids_map: &'indexer FieldsIdsMap, + new_fields_ids_map: FieldsIdsMap, + new_primary_key: Option>, + pool: &ThreadPool, + document_changes: &DC, +) -> Result<()> +where + DC: DocumentChanges<'pl>, +{ + // TODO find a better channel limit + let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000); + let new_fields_ids_map = RwLock::new(new_fields_ids_map); + + let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map); + let fields_ids_map_store = ThreadLocal::with_capacity(pool.current_num_threads()); + let mut extractor_allocs = ThreadLocal::with_capacity(pool.current_num_threads()); + let doc_allocs = ThreadLocal::with_capacity(pool.current_num_threads()); + + let indexing_context = IndexingContext { + index, + db_fields_ids_map, + new_fields_ids_map: &new_fields_ids_map, + doc_allocs: &doc_allocs, + fields_ids_map_store: &fields_ids_map_store, + }; + + thread::scope(|s| -> crate::Result<_> { + let indexer_span = tracing::Span::current(); + // TODO manage the errors correctly + let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || { + pool.in_place_scope(|_s| { + let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract"); + let _entered = span.enter(); + + // document but we need to create a function that collects and compresses documents. + let rtxn = index.read_txn().unwrap(); + let document_sender = extractor_sender.documents(); + let document_extractor = DocumentsExtractor::new(&document_sender); + let datastore = ThreadLocal::with_capacity(pool.current_num_threads()); + for_each_document_change(document_changes, &document_extractor, indexing_context, &mut extractor_allocs, &datastore)?; + + let mut documents_ids = index.documents_ids(&rtxn)?; + let delta_documents_ids = datastore.into_iter().map(|FullySend(d)| d.into_inner()).reduce(DelAddRoaringBitmap::merge).unwrap_or_default(); + delta_documents_ids.apply_to(&mut documents_ids); + extractor_sender.send_documents_ids(documents_ids).unwrap(); + + // document_sender.finish().unwrap(); + + const TEN_GIB: usize = 10 * 1024 * 1024 * 1024; + let current_num_threads = rayon::current_num_threads(); + let max_memory = TEN_GIB / current_num_threads; + eprintln!("A maximum of {max_memory} bytes will be used for each of the {current_num_threads} threads"); + let grenad_parameters = GrenadParameters { + max_memory: Some(max_memory), + ..GrenadParameters::default() + }; + + let facet_field_ids_delta; + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted"); + let _entered = span.enter(); + facet_field_ids_delta = merge_and_send_facet_docids( + FacetedDocidsExtractor::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?, + FacetDatabases::new(index), + index, + extractor_sender.facet_docids(), + )?; + } + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids"); + let _entered = span.enter(); + + let WordDocidsCaches { + word_docids, + word_fid_docids, + exact_word_docids, + word_position_docids, + fid_word_count_docids, + } = WordDocidsExtractors::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?; + + // TODO Word Docids Merger + // extractor_sender.send_searchable::(word_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_docids, + index.word_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + + // Word Fid Docids Merging + // extractor_sender.send_searchable::(word_fid_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_fid_docids, + index.word_fid_docids.remap_types(), + index, + extractor_sender.docids::() + )?; + } + + // Exact Word Docids Merging + // extractor_sender.send_searchable::(exact_word_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); + let _entered = span.enter(); + merge_and_send_docids( + exact_word_docids, + index.exact_word_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + + // Word Position Docids Merging + // extractor_sender.send_searchable::(word_position_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); + let _entered = span.enter(); + merge_and_send_docids( + word_position_docids, + index.word_position_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + + // Fid Word Count Docids Merging + // extractor_sender.send_searchable::(fid_word_count_docids).unwrap(); + { + let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); + let _entered = span.enter(); + merge_and_send_docids( + fid_word_count_docids, + index.field_id_word_count_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + } + + // run the proximity extraction only if the precision is by word + // this works only if the settings didn't change during this transaction. + let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default(); + if proximity_precision == ProximityPrecision::ByWord { + let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); + let _entered = span.enter(); + let caches = ::run_extraction(grenad_parameters, document_changes, indexing_context, &mut extractor_allocs)?; + merge_and_send_docids( + caches, + index.word_pair_proximity_docids.remap_types(), + index, + extractor_sender.docids::(), + )?; + } + + { + let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH"); + let _entered = span.enter(); + } + + // TODO THIS IS TOO MUCH + // - [ ] Extract fieldid docid facet number + // - [ ] Extract fieldid docid facet string + // - [ ] Extract facetid string fst + // - [ ] Extract facetid normalized string strings + + // TODO Inverted Indexes again + // - [x] Extract fieldid facet isempty docids + // - [x] Extract fieldid facet isnull docids + // - [x] Extract fieldid facet exists docids + + // TODO This is the normal system + // - [x] Extract fieldid facet number docids + // - [x] Extract fieldid facet string docids + + // TODO use None when needed + Result::Ok(facet_field_ids_delta) + }) + })?; + + for operation in writer_receiver { + let database = operation.database(index); + match operation.entry() { + EntryOperation::Delete(e) => { + if !database.delete(wtxn, e.entry())? { + unreachable!("We tried to delete an unknown key") + } + } + EntryOperation::Write(e) => database.put(wtxn, e.key(), e.value())?, + } + } + + /// TODO handle the panicking threads + let facet_field_ids_delta = extractor_handle.join().unwrap()?; + + if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { + compute_prefix_database(index, wtxn, prefix_delta)?; + } + + compute_facet_search_database(index, wtxn, global_fields_ids_map)?; + + compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; + + Result::Ok(()) + })?; + + // required to into_inner the new_fields_ids_map + drop(fields_ids_map_store); + + let fields_ids_map = new_fields_ids_map.into_inner().unwrap(); + index.put_fields_ids_map(wtxn, &fields_ids_map)?; + + if let Some(new_primary_key) = new_primary_key { + index.put_primary_key(wtxn, new_primary_key.name())?; + } + + // used to update the localized and weighted maps while sharing the update code with the settings pipeline. + let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn)?; + inner_index_settings.recompute_facets(wtxn, index)?; + inner_index_settings.recompute_searchables(wtxn, index)?; + + index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; + + Ok(()) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")] +fn compute_prefix_database( + index: &Index, + wtxn: &mut RwTxn, + prefix_delta: PrefixDelta, +) -> Result<()> { + eprintln!("prefix_delta: {:?}", &prefix_delta); + let PrefixDelta { modified, deleted } = prefix_delta; + // Compute word prefix docids + compute_word_prefix_docids(wtxn, index, &modified, &deleted)?; + // Compute exact word prefix docids + compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted)?; + // Compute word prefix fid docids + compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted)?; + // Compute word prefix position docids + compute_word_prefix_position_docids(wtxn, index, &modified, &deleted) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing")] +fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result> { + let rtxn = index.read_txn()?; + let words_fst = index.words_fst(&rtxn)?; + let mut word_fst_builder = WordFstBuilder::new(&words_fst)?; + let prefix_settings = index.prefix_settings(&rtxn)?; + word_fst_builder.with_prefix_settings(prefix_settings); + + let previous_words = index.word_docids.iter(&rtxn)?.remap_data_type::(); + let current_words = index.word_docids.iter(wtxn)?.remap_data_type::(); + for eob in merge_join_by(previous_words, current_words, |lhs, rhs| match (lhs, rhs) { + (Ok((l, _)), Ok((r, _))) => l.cmp(r), + (Err(_), _) | (_, Err(_)) => Ordering::Equal, + }) { + match eob { + EitherOrBoth::Both(lhs, rhs) => { + let (word, lhs_bytes) = lhs?; + let (_, rhs_bytes) = rhs?; + if lhs_bytes != rhs_bytes { + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + } + EitherOrBoth::Left(result) => { + let (word, _) = result?; + word_fst_builder.register_word(DelAdd::Deletion, word.as_ref())?; + } + EitherOrBoth::Right(result) => { + let (word, _) = result?; + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + } + } + + let span = tracing::trace_span!(target: "indexing::documents::merge", "words_fst"); + let _entered = span.enter(); + + let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, &rtxn)?; + // extractor_sender.main().write_words_fst(word_fst_mmap).unwrap(); + index.main.remap_types::().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; + if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { + // extractor_sender.main().write_words_prefixes_fst(prefixes_fst_mmap).unwrap(); + index.main.remap_types::().put( + wtxn, + WORDS_PREFIXES_FST_KEY, + &prefixes_fst_mmap, + )?; + Ok(Some(prefix_delta)) + } else { + Ok(None) + } +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] +fn compute_facet_search_database( + index: &Index, + wtxn: &mut RwTxn, + global_fields_ids_map: GlobalFieldsIdsMap, +) -> Result<()> { + let rtxn = index.read_txn()?; + let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; + let mut facet_search_builder = FacetSearchBuilder::new( + global_fields_ids_map, + localized_attributes_rules.unwrap_or_default(), + ); + + let previous_facet_id_string_docids = index + .facet_id_string_docids + .iter(&rtxn)? + .remap_data_type::() + .filter(|r| r.as_ref().map_or(true, |(k, _)| k.level == 0)); + let current_facet_id_string_docids = index + .facet_id_string_docids + .iter(wtxn)? + .remap_data_type::() + .filter(|r| r.as_ref().map_or(true, |(k, _)| k.level == 0)); + for eob in merge_join_by( + previous_facet_id_string_docids, + current_facet_id_string_docids, + |lhs, rhs| match (lhs, rhs) { + (Ok((l, _)), Ok((r, _))) => l.cmp(r), + (Err(_), _) | (_, Err(_)) => Ordering::Equal, + }, + ) { + match eob { + EitherOrBoth::Both(lhs, rhs) => { + let (_, _) = lhs?; + let (_, _) = rhs?; + } + EitherOrBoth::Left(result) => { + let (key, _) = result?; + facet_search_builder + .register_from_key(DelAdd::Deletion, key.left_bound.as_ref())?; + } + EitherOrBoth::Right(result) => { + let (key, _) = result?; + facet_search_builder + .register_from_key(DelAdd::Addition, key.left_bound.as_ref())?; + } + } + } + + facet_search_builder.merge_and_write(index, wtxn, &rtxn) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_field_ids")] +fn compute_facet_level_database( + index: &Index, + wtxn: &mut RwTxn, + facet_field_ids_delta: FacetFieldIdsDelta, +) -> Result<()> { + eprintln!("facet_field_ids_delta: {:?}", &facet_field_ids_delta); + if let Some(modified_facet_string_ids) = facet_field_ids_delta.modified_facet_string_ids() { + let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); + let _entered = span.enter(); + FacetsUpdateBulk::new_not_updating_level_0( + index, + modified_facet_string_ids, + FacetType::String, + ) + .execute(wtxn)?; + } + if let Some(modified_facet_number_ids) = facet_field_ids_delta.modified_facet_number_ids() { + let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number"); + let _entered = span.enter(); + FacetsUpdateBulk::new_not_updating_level_0( + index, + modified_facet_number_ids, + FacetType::Number, + ) + .execute(wtxn)?; + } + + Ok(()) +} + +/// Returns the primary key that has already been set for this index or the +/// one we will guess by searching for the first key that contains "id" as a substring, +/// and whether the primary key changed +/// TODO move this elsewhere +pub fn retrieve_or_guess_primary_key<'a>( + rtxn: &'a RoTxn<'a>, + index: &Index, + new_fields_ids_map: &mut FieldsIdsMap, + primary_key_from_op: Option<&'a str>, + first_document: Option<&'a TopLevelMap<'a>>, +) -> Result, bool), UserError>> { + // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it. + + // do we have an existing declared primary key? + let (primary_key, has_changed) = if let Some(primary_key_from_db) = index.primary_key(rtxn)? { + // did we request a primary key in the operation? + match primary_key_from_op { + // we did, and it is different from the DB one + Some(primary_key_from_op) if primary_key_from_op != primary_key_from_db => { + // is the index empty? + if index.number_of_documents(rtxn)? == 0 { + // change primary key + (primary_key_from_op, true) + } else { + return Ok(Err(UserError::PrimaryKeyCannotBeChanged( + primary_key_from_db.to_string(), + ))); + } + } + _ => (primary_key_from_db, false), + } + } else { + // no primary key in the DB => let's set one + // did we request a primary key in the operation? + let primary_key = if let Some(primary_key_from_op) = primary_key_from_op { + // set primary key from operation + primary_key_from_op + } else { + // guess primary key + let first_document = match first_document { + Some(document) => document, + // previous indexer when no pk is set + we send an empty payload => index_primary_key_no_candidate_found + None => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), + }; + + let mut guesses: Vec<&str> = first_document + .keys() + .map(AsRef::as_ref) + .filter(|name| name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY)) + .collect(); + + // sort the keys in lexicographical order, so that fields are always in the same order. + guesses.sort_unstable(); + + match guesses.as_slice() { + [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), + [name] => { + tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); + *name + } + multiple => { + return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound { + candidates: multiple + .iter() + .map(|candidate| candidate.to_string()) + .collect(), + })) + } + } + }; + (primary_key, true) + }; + + match PrimaryKey::new_or_insert(primary_key, new_fields_ids_map) { + Ok(primary_key) => Ok(Ok((primary_key, has_changed))), + Err(err) => Ok(Err(err)), + } +} diff --git a/milli/src/update/new/indexer/partial_dump.rs b/crates/milli/src/update/new/indexer/partial_dump.rs similarity index 100% rename from milli/src/update/new/indexer/partial_dump.rs rename to crates/milli/src/update/new/indexer/partial_dump.rs diff --git a/milli/src/update/new/indexer/update_by_function.rs b/crates/milli/src/update/new/indexer/update_by_function.rs similarity index 100% rename from milli/src/update/new/indexer/update_by_function.rs rename to crates/milli/src/update/new/indexer/update_by_function.rs diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs new file mode 100644 index 000000000..b1c5c5fd9 --- /dev/null +++ b/crates/milli/src/update/new/merger.rs @@ -0,0 +1,255 @@ +use std::io::{self}; + +use bincode::ErrorKind; +use hashbrown::HashSet; +use heed::types::Bytes; +use heed::{Database, RoTxn}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use roaring::RoaringBitmap; + +use super::channel::*; +use super::extract::{ + merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind, +}; +use super::DocumentChange; +use crate::{CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, Result}; + +pub struct GeoExtractor { + rtree: Option>, +} + +impl GeoExtractor { + pub fn new(rtxn: &RoTxn, index: &Index) -> Result> { + let is_sortable = index.sortable_fields(rtxn)?.contains("_geo"); + let is_filterable = index.filterable_fields(rtxn)?.contains("_geo"); + if is_sortable || is_filterable { + Ok(Some(GeoExtractor { rtree: index.geo_rtree(rtxn)? })) + } else { + Ok(None) + } + } + + pub fn manage_change( + &mut self, + fidmap: &mut GlobalFieldsIdsMap, + change: &DocumentChange, + ) -> Result<()> { + match change { + DocumentChange::Deletion(_) => todo!(), + DocumentChange::Update(_) => todo!(), + DocumentChange::Insertion(_) => todo!(), + } + } + + pub fn serialize_rtree(self, writer: &mut W) -> Result { + match self.rtree { + Some(rtree) => { + // TODO What should I do? + bincode::serialize_into(writer, &rtree).map(|_| true).map_err(|e| match *e { + ErrorKind::Io(e) => Error::IoError(e), + ErrorKind::InvalidUtf8Encoding(_) => todo!(), + ErrorKind::InvalidBoolEncoding(_) => todo!(), + ErrorKind::InvalidCharEncoding => todo!(), + ErrorKind::InvalidTagEncoding(_) => todo!(), + ErrorKind::DeserializeAnyNotSupported => todo!(), + ErrorKind::SizeLimit => todo!(), + ErrorKind::SequenceMustHaveLength => todo!(), + ErrorKind::Custom(_) => todo!(), + }) + } + None => Ok(false), + } + } +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] +pub fn merge_and_send_docids<'extractor>( + mut caches: Vec>, + database: Database, + index: &Index, + docids_sender: impl DocidsSender + Sync, +) -> Result<()> { + transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| { + let rtxn = index.read_txn()?; + let mut buffer = Vec::new(); + merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| { + let current = database.get(&rtxn, key)?; + match merge_cbo_bitmaps(current, del, add)? { + Operation::Write(bitmap) => { + let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer); + docids_sender.write(key, value).unwrap(); + Ok(()) + } + Operation::Delete => { + docids_sender.delete(key).unwrap(); + Ok(()) + } + Operation::Ignore => Ok(()), + } + }) + }) +} + +#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] +pub fn merge_and_send_facet_docids<'extractor>( + mut caches: Vec>, + database: FacetDatabases, + index: &Index, + docids_sender: impl DocidsSender + Sync, +) -> Result { + transpose_and_freeze_caches(&mut caches)? + .into_par_iter() + .map(|frozen| { + let mut facet_field_ids_delta = FacetFieldIdsDelta::default(); + let rtxn = index.read_txn()?; + let mut buffer = Vec::new(); + merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| { + let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?; + match merge_cbo_bitmaps(current, del, add)? { + Operation::Write(bitmap) => { + facet_field_ids_delta.register_from_key(key); + let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer); + docids_sender.write(key, value).unwrap(); + Ok(()) + } + Operation::Delete => { + facet_field_ids_delta.register_from_key(key); + docids_sender.delete(key).unwrap(); + Ok(()) + } + Operation::Ignore => Ok(()), + } + })?; + + Ok(facet_field_ids_delta) + }) + .reduce(|| Ok(FacetFieldIdsDelta::default()), |lhs, rhs| Ok(lhs?.merge(rhs?))) +} + +pub struct FacetDatabases<'a> { + index: &'a Index, +} + +impl<'a> FacetDatabases<'a> { + pub fn new(index: &'a Index) -> Self { + Self { index } + } + + fn get_cbo_roaring_bytes_value<'t>( + &self, + rtxn: &'t RoTxn<'_>, + key: &[u8], + ) -> heed::Result> { + let (facet_kind, key) = FacetKind::extract_from_key(key); + + let value = + super::channel::Database::from(facet_kind).database(self.index).get(rtxn, key)?; + match facet_kind { + // skip level group size + FacetKind::String | FacetKind::Number => Ok(value.map(|v| &v[1..])), + _ => Ok(value), + } + } +} + +#[derive(Debug, Default)] +pub struct FacetFieldIdsDelta { + /// The field ids that have been modified + modified_facet_string_ids: HashSet, + modified_facet_number_ids: HashSet, +} + +impl FacetFieldIdsDelta { + fn register_facet_string_id(&mut self, field_id: FieldId) { + self.modified_facet_string_ids.insert(field_id); + } + + fn register_facet_number_id(&mut self, field_id: FieldId) { + self.modified_facet_number_ids.insert(field_id); + } + + fn register_from_key(&mut self, key: &[u8]) { + let (facet_kind, field_id) = self.extract_key_data(key); + match facet_kind { + FacetKind::Number => self.register_facet_number_id(field_id), + FacetKind::String => self.register_facet_string_id(field_id), + _ => (), + } + } + + fn extract_key_data(&self, key: &[u8]) -> (FacetKind, FieldId) { + let facet_kind = FacetKind::from(key[0]); + let field_id = FieldId::from_be_bytes([key[1], key[2]]); + (facet_kind, field_id) + } + + pub fn modified_facet_string_ids(&self) -> Option> { + if self.modified_facet_string_ids.is_empty() { + None + } else { + Some(self.modified_facet_string_ids.iter().copied().collect()) + } + } + + pub fn modified_facet_number_ids(&self) -> Option> { + if self.modified_facet_number_ids.is_empty() { + None + } else { + Some(self.modified_facet_number_ids.iter().copied().collect()) + } + } + + pub fn merge(mut self, rhs: Self) -> Self { + let Self { modified_facet_number_ids, modified_facet_string_ids } = rhs; + modified_facet_number_ids.into_iter().for_each(|fid| { + self.modified_facet_number_ids.insert(fid); + }); + modified_facet_string_ids.into_iter().for_each(|fid| { + self.modified_facet_string_ids.insert(fid); + }); + self + } +} + +enum Operation { + Write(RoaringBitmap), + Delete, + Ignore, +} + +/// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap. +fn merge_cbo_bitmaps( + current: Option<&[u8]>, + del: Option, + add: Option, +) -> Result { + let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; + match (current, del, add) { + (None, None, None) => Ok(Operation::Ignore), // but it's strange + (None, None, Some(add)) => Ok(Operation::Write(add)), + (None, Some(_del), None) => Ok(Operation::Ignore), // but it's strange + (None, Some(_del), Some(add)) => Ok(Operation::Write(add)), + (Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange + (Some(current), None, Some(add)) => Ok(Operation::Write(current | add)), + (Some(current), Some(del), add) => { + let output = match add { + Some(add) => (¤t - del) | add, + None => ¤t - del, + }; + if output.is_empty() { + Ok(Operation::Delete) + } else if current == output { + Ok(Operation::Ignore) + } else { + Ok(Operation::Write(output)) + } + } + } +} + +/// TODO Return the slice directly from the serialize_into method +fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec) -> &'b [u8] { + buffer.clear(); + CboRoaringBitmapCodec::serialize_into(bitmap, buffer); + buffer.as_slice() +} diff --git a/milli/src/update/new/mod.rs b/crates/milli/src/update/new/mod.rs similarity index 80% rename from milli/src/update/new/mod.rs rename to crates/milli/src/update/new/mod.rs index 6b59b5b59..36a9a3f92 100644 --- a/milli/src/update/new/mod.rs +++ b/crates/milli/src/update/new/mod.rs @@ -1,4 +1,7 @@ pub use document_change::{Deletion, DocumentChange, Insertion, Update}; +pub use merger::{ + merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases, FacetFieldIdsDelta, +}; pub use top_level_map::{CowStr, TopLevelMap}; use super::del_add::DelAdd; @@ -8,6 +11,8 @@ mod channel; pub mod document; mod document_change; mod extract; +mod facet_search_builder; +mod fst_merger_builder; pub mod indexer; mod merger; mod parallel_iterator_ext; diff --git a/crates/milli/src/update/new/parallel_iterator_ext.rs b/crates/milli/src/update/new/parallel_iterator_ext.rs new file mode 100644 index 000000000..ff69d7acf --- /dev/null +++ b/crates/milli/src/update/new/parallel_iterator_ext.rs @@ -0,0 +1,33 @@ +use std::sync::Arc; + +use rayon::iter::ParallelIterator; + +pub trait ParallelIteratorExt: ParallelIterator { + /// A method to run a closure of all the items and return an owned error. + /// + /// The init function is ran only as necessary which is basically once by thread. + fn try_arc_for_each_try_init(self, init: INIT, op: F) -> Result<(), E> + where + E: Send + Sync, + F: Fn(&mut T, Self::Item) -> Result<(), Arc> + Sync + Send + Clone, + INIT: Fn() -> Result + Sync + Send + Clone, + { + let result = self.try_for_each_init( + move || match init() { + Ok(t) => Ok(t), + Err(err) => Err(Arc::new(err)), + }, + move |result, item| match result { + Ok(t) => op(t, item), + Err(err) => Err(err.clone()), + }, + ); + + match result { + Ok(()) => Ok(()), + Err(err) => Err(Arc::into_inner(err).expect("the error must be only owned by us")), + } + } +} + +impl ParallelIteratorExt for T {} diff --git a/milli/src/update/new/top_level_map.rs b/crates/milli/src/update/new/top_level_map.rs similarity index 100% rename from milli/src/update/new/top_level_map.rs rename to crates/milli/src/update/new/top_level_map.rs diff --git a/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs similarity index 100% rename from milli/src/update/new/vector_document.rs rename to crates/milli/src/update/new/vector_document.rs diff --git a/milli/src/update/new/word_fst_builder.rs b/crates/milli/src/update/new/word_fst_builder.rs similarity index 62% rename from milli/src/update/new/word_fst_builder.rs rename to crates/milli/src/update/new/word_fst_builder.rs index 867d3e86d..834266045 100644 --- a/milli/src/update/new/word_fst_builder.rs +++ b/crates/milli/src/update/new/word_fst_builder.rs @@ -1,4 +1,4 @@ -use std::{fs::File, io::BufWriter}; +use std::io::BufWriter; use fst::{Set, SetBuilder, Streamer}; use memmap2::Mmap; @@ -7,23 +7,19 @@ use tempfile::tempfile; use crate::{index::PrefixSettings, update::del_add::DelAdd, InternalError, Prefix, Result}; +use super::fst_merger_builder::FstMergerBuilder; + pub struct WordFstBuilder<'a> { - stream: Option>, - word_fst_builder: SetBuilder>, - last_word: Option>, + word_fst_builder: FstMergerBuilder<'a>, prefix_fst_builder: Option, - inserted_words: usize, registered_words: usize, } impl<'a> WordFstBuilder<'a> { pub fn new(words_fst: &'a Set>) -> Result { Ok(Self { - stream: Some(words_fst.stream()), - word_fst_builder: SetBuilder::new(BufWriter::new(tempfile()?))?, + word_fst_builder: FstMergerBuilder::new(Some(words_fst))?, prefix_fst_builder: None, - last_word: None, - inserted_words: 0, registered_words: 0, }) } @@ -38,100 +34,13 @@ impl<'a> WordFstBuilder<'a> { self.registered_words += 1; } - if let Some(left) = self.last_word.take() { - let (left_inserted, right_inserted) = - self.compare_and_insert(deladd, left.as_slice(), right)?; - - // left was not inserted, so we keep it for the next iteration - if !left_inserted { - self.last_word = Some(left); + self.word_fst_builder.register(deladd, right, &mut |bytes, deladd, is_modified| { + if let Some(prefix_fst_builder) = &mut self.prefix_fst_builder { + prefix_fst_builder.insert_word(bytes, deladd, is_modified) + } else { + Ok(()) } - - // right was inserted, so we can stop - if right_inserted { - return Ok(()); - } - } - - if let Some(mut stream) = self.stream.take() { - while let Some(left) = stream.next() { - let (left_inserted, right_inserted) = - self.compare_and_insert(deladd, left, right)?; - - // left was not inserted, so we keep it for the next iteration - if !left_inserted { - self.last_word = Some(left.to_vec()); - } - - // right was inserted, so we can stop - if right_inserted { - self.stream = Some(stream); - return Ok(()); - } - } - - // If we reach this point, it means that the stream is empty - // and we need to insert the incoming word - self.insert_word(right, deladd, true)?; - - self.stream = Some(stream); - } - - Ok(()) - } - - pub fn compare_and_insert( - &mut self, - deladd: DelAdd, - left: &[u8], - right: &[u8], - ) -> Result<(bool, bool)> { - let mut left_inserted = false; - let mut right_inserted = false; - match left.cmp(right) { - std::cmp::Ordering::Less => { - // We need to insert the last word from the current fst - self.insert_word(left, DelAdd::Addition, false)?; - - left_inserted = true; - } - std::cmp::Ordering::Equal => { - self.insert_word(right, deladd, true)?; - - left_inserted = true; - right_inserted = true; - } - std::cmp::Ordering::Greater => { - self.insert_word(right, deladd, true)?; - - right_inserted = true; - } - } - - Ok((left_inserted, right_inserted)) - } - - fn insert_word(&mut self, bytes: &[u8], deladd: DelAdd, is_modified: bool) -> Result<()> { - // Addition: We insert the word - // Deletion: We delete the word by not inserting it - if deladd == DelAdd::Addition { - self.inserted_words += 1; - self.word_fst_builder.insert(bytes)?; - } - - if let Some(prefix_fst_builder) = self.prefix_fst_builder.as_mut() { - prefix_fst_builder.insert_word(bytes, deladd, is_modified)?; - } - - Ok(()) - } - - fn drain_stream(&mut self) -> Result<()> { - if let Some(mut stream) = self.stream.take() { - while let Some(current) = stream.next() { - self.insert_word(current, DelAdd::Addition, false)?; - } - } + })?; Ok(()) } @@ -141,13 +50,13 @@ impl<'a> WordFstBuilder<'a> { index: &crate::Index, rtxn: &heed::RoTxn, ) -> Result<(Mmap, Option)> { - self.drain_stream()?; - - let words_fst_file = - self.word_fst_builder.into_inner()?.into_inner().map_err(|_| { - InternalError::IndexingMergingKeys { process: "building-words-fst" } - })?; - let words_fst_mmap = unsafe { Mmap::map(&words_fst_file)? }; + let words_fst_mmap = self.word_fst_builder.build(&mut |bytes, deladd, is_modified| { + if let Some(prefix_fst_builder) = &mut self.prefix_fst_builder { + prefix_fst_builder.insert_word(bytes, deladd, is_modified) + } else { + Ok(()) + } + })?; let prefix_data = self .prefix_fst_builder diff --git a/milli/src/update/new/words_prefix_docids.rs b/crates/milli/src/update/new/words_prefix_docids.rs similarity index 60% rename from milli/src/update/new/words_prefix_docids.rs rename to crates/milli/src/update/new/words_prefix_docids.rs index 38c2b1744..edc09c5f3 100644 --- a/milli/src/update/new/words_prefix_docids.rs +++ b/crates/milli/src/update/new/words_prefix_docids.rs @@ -1,10 +1,16 @@ +use std::cell::RefCell; use std::collections::HashSet; +use std::io::{BufReader, BufWriter, Read, Seek, Write}; use hashbrown::HashMap; use heed::types::Bytes; -use heed::{BytesDecode, Database, RwTxn}; -use roaring::RoaringBitmap; +use heed::{BytesDecode, Database, RoTxn, RwTxn}; +use rayon::iter::{IntoParallelIterator, ParallelIterator as _}; +use roaring::MultiOps; +use tempfile::tempfile; +use thread_local::ThreadLocal; +use super::indexer::document_changes::RefCellExt; use crate::heed_codec::StrBEU16Codec; use crate::{CboRoaringBitmapCodec, Index, Prefix, Result}; @@ -38,22 +44,103 @@ impl WordPrefixDocids { prefixes: &HashSet, ) -> Result<()> { // We fetch the docids associated to the newly added word prefix fst only. - let mut docids = RoaringBitmap::new(); - for prefix in prefixes { - docids.clear(); - let prefix = prefix.as_bytes(); - for result in self.database.prefix_iter(wtxn, prefix)? { - let (_word, data) = result?; - docids |= &data; - } + // And collect the CboRoaringBitmaps pointers in an HashMap. + let frozen = FrozenPrefixBitmaps::from_prefixes(self.database, wtxn, prefixes)?; - self.prefix_database.put(wtxn, prefix, &docids)?; + // We access this HashMap in parallel to compute the *union* of all + // of them and *serialize* them into files. There is one file by CPU. + let local_entries = ThreadLocal::with_capacity(rayon::current_num_threads()); + prefixes.into_par_iter().map(AsRef::as_ref).try_for_each(|prefix| { + let refcell = local_entries.get_or_try(|| { + tempfile().map(BufWriter::new).map(|f| RefCell::new((Vec::new(), f, Vec::new()))) + })?; + + let mut refmut = refcell.borrow_mut_or_yield(); + let (ref mut index, ref mut file, ref mut buffer) = *refmut; + + let output = frozen + .bitmaps(prefix) + .unwrap() + .iter() + .map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes)) + .union()?; + + buffer.clear(); + CboRoaringBitmapCodec::serialize_into(&output, buffer); + index.push(PrefixEntry { prefix, serialized_length: buffer.len() }); + file.write_all(buffer) + })?; + + drop(frozen); + + // We iterate over all the collected and serialized bitmaps through + // the files and entries to eventually put them in the final database. + for refcell in local_entries { + let (index, file, mut buffer) = refcell.into_inner(); + let mut file = file.into_inner().map_err(|e| e.into_error())?; + file.rewind()?; + let mut file = BufReader::new(file); + for PrefixEntry { prefix, serialized_length } in index { + buffer.resize(serialized_length, 0); + file.read_exact(&mut buffer)?; + self.prefix_database.remap_data_type::().put( + wtxn, + prefix.as_bytes(), + &buffer, + )?; + } } Ok(()) } } +/// Represents a prefix and the lenght the bitmap takes on disk. +struct PrefixEntry<'a> { + prefix: &'a str, + serialized_length: usize, +} + +/// Stores prefixes along with all the pointers to the associated +/// CBoRoaringBitmaps. +/// +/// They are collected synchronously and stored into an HashMap. The +/// Synchronous process is doing a small amount of work by just storing +/// pointers. It can then be accessed in parallel to get the associated +/// bitmaps pointers. +struct FrozenPrefixBitmaps<'a, 'rtxn> { + prefixes_bitmaps: HashMap<&'a str, Vec<&'rtxn [u8]>>, +} + +impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> { + #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")] + pub fn from_prefixes( + database: Database, + rtxn: &'rtxn RoTxn, + prefixes: &'a HashSet, + ) -> heed::Result { + let database = database.remap_data_type::(); + + let mut prefixes_bitmaps = HashMap::new(); + for prefix in prefixes { + let mut bitmap_bytes = Vec::new(); + for result in database.prefix_iter(rtxn, prefix.as_bytes())? { + let (_word, bytes) = result?; + bitmap_bytes.push(bytes); + } + assert!(prefixes_bitmaps.insert(prefix.as_str(), bitmap_bytes).is_none()); + } + + Ok(Self { prefixes_bitmaps }) + } + + pub fn bitmaps(&self, key: &str) -> Option<&[&'rtxn [u8]]> { + self.prefixes_bitmaps.get(key).map(AsRef::as_ref) + } +} + +unsafe impl<'a, 'rtxn> Sync for FrozenPrefixBitmaps<'a, 'rtxn> {} + struct WordPrefixIntegerDocids { database: Database, prefix_database: Database, diff --git a/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs similarity index 100% rename from milli/src/update/settings.rs rename to crates/milli/src/update/settings.rs diff --git a/milli/src/update/update_step.rs b/crates/milli/src/update/update_step.rs similarity index 100% rename from milli/src/update/update_step.rs rename to crates/milli/src/update/update_step.rs diff --git a/milli/src/update/word_prefix_docids.rs b/crates/milli/src/update/word_prefix_docids.rs similarity index 100% rename from milli/src/update/word_prefix_docids.rs rename to crates/milli/src/update/word_prefix_docids.rs diff --git a/milli/src/update/words_prefix_integer_docids.rs b/crates/milli/src/update/words_prefix_integer_docids.rs similarity index 100% rename from milli/src/update/words_prefix_integer_docids.rs rename to crates/milli/src/update/words_prefix_integer_docids.rs diff --git a/milli/src/update/words_prefixes_fst.rs b/crates/milli/src/update/words_prefixes_fst.rs similarity index 100% rename from milli/src/update/words_prefixes_fst.rs rename to crates/milli/src/update/words_prefixes_fst.rs diff --git a/milli/src/vector/error.rs b/crates/milli/src/vector/error.rs similarity index 100% rename from milli/src/vector/error.rs rename to crates/milli/src/vector/error.rs diff --git a/milli/src/vector/hf.rs b/crates/milli/src/vector/hf.rs similarity index 100% rename from milli/src/vector/hf.rs rename to crates/milli/src/vector/hf.rs diff --git a/milli/src/vector/json_template.rs b/crates/milli/src/vector/json_template.rs similarity index 100% rename from milli/src/vector/json_template.rs rename to crates/milli/src/vector/json_template.rs diff --git a/milli/src/vector/manual.rs b/crates/milli/src/vector/manual.rs similarity index 100% rename from milli/src/vector/manual.rs rename to crates/milli/src/vector/manual.rs diff --git a/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs similarity index 61% rename from milli/src/vector/mod.rs rename to crates/milli/src/vector/mod.rs index a21e9e2ca..57da50580 100644 --- a/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -1,7 +1,7 @@ use std::collections::HashMap; use std::sync::Arc; -use arroy::distances::{Angular, BinaryQuantizedAngular}; +use arroy::distances::{BinaryQuantizedCosine, Cosine}; use arroy::ItemId; use deserr::{DeserializeError, Deserr}; use heed::{RoTxn, RwTxn, Unspecified}; @@ -32,105 +32,243 @@ pub const REQUEST_PARALLELISM: usize = 40; pub struct ArroyWrapper { quantized: bool, - index: u16, + embedder_index: u8, database: arroy::Database, } impl ArroyWrapper { - pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { - Self { database, index, quantized } + pub fn new( + database: arroy::Database, + embedder_index: u8, + quantized: bool, + ) -> Self { + Self { database, embedder_index, quantized } } - pub fn index(&self) -> u16 { - self.index + pub fn embedder_index(&self) -> u8 { + self.embedder_index + } + + fn readers<'a, D: arroy::Distance>( + &'a self, + rtxn: &'a RoTxn<'a>, + db: arroy::Database, + ) -> impl Iterator, arroy::Error>> + 'a { + arroy_db_range_for_embedder(self.embedder_index).map_while(move |index| { + match arroy::Reader::open(rtxn, index, db) { + Ok(reader) => match reader.is_empty(rtxn) { + Ok(false) => Some(Ok(reader)), + Ok(true) => None, + Err(e) => Some(Err(e)), + }, + Err(arroy::Error::MissingMetadata(_)) => None, + Err(e) => Some(Err(e)), + } + }) } pub fn dimensions(&self, rtxn: &RoTxn) -> Result { + let first_id = arroy_db_range_for_embedder(self.embedder_index).next().unwrap(); if self.quantized { - Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions()) + Ok(arroy::Reader::open(rtxn, first_id, self.quantized_db())?.dimensions()) } else { - Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions()) + Ok(arroy::Reader::open(rtxn, first_id, self.angular_db())?.dimensions()) } } - pub fn quantize( + pub fn build_and_quantize( &mut self, wtxn: &mut RwTxn, - index: u16, + rng: &mut R, dimension: usize, + quantizing: bool, + cancel: &(impl Fn() -> bool + Sync + Send), ) -> Result<(), arroy::Error> { - if !self.quantized { - let writer = arroy::Writer::new(self.angular_db(), index, dimension); - writer.prepare_changing_distance::(wtxn)?; - self.quantized = true; + for index in arroy_db_range_for_embedder(self.embedder_index) { + if self.quantized { + let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + if writer.need_build(wtxn)? { + writer.builder(rng).build(wtxn)? + } else if writer.is_empty(wtxn)? { + break; + } + } else { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + // If we are quantizing the databases, we can't know from meilisearch + // if the db was empty but still contained the wrong metadata, thus we need + // to quantize everything and can't stop early. Since this operation can + // only happens once in the life of an embedder, it's not very performances + // sensitive. + if quantizing && !self.quantized { + let writer = writer.prepare_changing_distance::(wtxn)?; + writer.builder(rng).cancel(cancel).build(wtxn)?; + } else if writer.need_build(wtxn)? { + writer.builder(rng).cancel(cancel).build(wtxn)?; + } else if writer.is_empty(wtxn)? { + break; + } + } } Ok(()) } - pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result { - if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn) - } else { - arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn) - } - } - - pub fn build( + /// Overwrite all the embeddings associated with the index and item ID. + /// /!\ It won't remove embeddings after the last passed embedding, which can leave stale embeddings. + /// You should call `del_items` on the `item_id` before calling this method. + /// /!\ Cannot insert more than u8::MAX embeddings; after inserting u8::MAX embeddings, all the remaining ones will be silently ignored. + pub fn add_items( &self, wtxn: &mut RwTxn, - rng: &mut R, - dimension: usize, + item_id: arroy::ItemId, + embeddings: &Embeddings, ) -> Result<(), arroy::Error> { - if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None) - } else { - arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None) + let dimension = embeddings.dimension(); + for (index, vector) in + arroy_db_range_for_embedder(self.embedder_index).zip(embeddings.iter()) + { + if self.quantized { + arroy::Writer::new(self.quantized_db(), index, dimension) + .add_item(wtxn, item_id, vector)? + } else { + arroy::Writer::new(self.angular_db(), index, dimension) + .add_item(wtxn, item_id, vector)? + } } + Ok(()) } + /// Add one document int for this index where we can find an empty spot. pub fn add_item( &self, wtxn: &mut RwTxn, - dimension: usize, item_id: arroy::ItemId, vector: &[f32], ) -> Result<(), arroy::Error> { if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension) - .add_item(wtxn, item_id, vector) + self._add_item(wtxn, self.quantized_db(), item_id, vector) } else { - arroy::Writer::new(self.angular_db(), self.index, dimension) - .add_item(wtxn, item_id, vector) + self._add_item(wtxn, self.angular_db(), item_id, vector) } } - pub fn del_item( + fn _add_item( + &self, + wtxn: &mut RwTxn, + db: arroy::Database, + item_id: arroy::ItemId, + vector: &[f32], + ) -> Result<(), arroy::Error> { + let dimension = vector.len(); + + for index in arroy_db_range_for_embedder(self.embedder_index) { + let writer = arroy::Writer::new(db, index, dimension); + if !writer.contains_item(wtxn, item_id)? { + writer.add_item(wtxn, item_id, vector)?; + break; + } + } + Ok(()) + } + + /// Delete all embeddings from a specific `item_id` + pub fn del_items( &self, wtxn: &mut RwTxn, dimension: usize, item_id: arroy::ItemId, + ) -> Result<(), arroy::Error> { + for index in arroy_db_range_for_embedder(self.embedder_index) { + if self.quantized { + let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + if !writer.del_item(wtxn, item_id)? { + break; + } + } else { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + if !writer.del_item(wtxn, item_id)? { + break; + } + } + } + + Ok(()) + } + + /// Delete one item. + pub fn del_item( + &self, + wtxn: &mut RwTxn, + item_id: arroy::ItemId, + vector: &[f32], ) -> Result { if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id) + self._del_item(wtxn, self.quantized_db(), item_id, vector) } else { - arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id) + self._del_item(wtxn, self.angular_db(), item_id, vector) } } + fn _del_item( + &self, + wtxn: &mut RwTxn, + db: arroy::Database, + item_id: arroy::ItemId, + vector: &[f32], + ) -> Result { + let dimension = vector.len(); + let mut deleted_index = None; + + for index in arroy_db_range_for_embedder(self.embedder_index) { + let writer = arroy::Writer::new(db, index, dimension); + let Some(candidate) = writer.item_vector(wtxn, item_id)? else { + // uses invariant: vectors are packed in the first writers. + break; + }; + if candidate == vector { + writer.del_item(wtxn, item_id)?; + deleted_index = Some(index); + } + } + + // 🥲 enforce invariant: vectors are packed in the first writers. + if let Some(deleted_index) = deleted_index { + let mut last_index_with_a_vector = None; + for index in + arroy_db_range_for_embedder(self.embedder_index).skip(deleted_index as usize) + { + let writer = arroy::Writer::new(db, index, dimension); + let Some(candidate) = writer.item_vector(wtxn, item_id)? else { + break; + }; + last_index_with_a_vector = Some((index, candidate)); + } + if let Some((last_index, vector)) = last_index_with_a_vector { + let writer = arroy::Writer::new(db, last_index, dimension); + writer.del_item(wtxn, item_id)?; + let writer = arroy::Writer::new(db, deleted_index, dimension); + writer.add_item(wtxn, item_id, &vector)?; + } + } + Ok(deleted_index.is_some()) + } + pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { - if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn) - } else { - arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn) - } - } - - pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result { - if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn) - } else { - arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn) + for index in arroy_db_range_for_embedder(self.embedder_index) { + if self.quantized { + let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + if writer.is_empty(wtxn)? { + break; + } + writer.clear(wtxn)?; + } else { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + if writer.is_empty(wtxn)? { + break; + } + writer.clear(wtxn)?; + } } + Ok(()) } pub fn contains_item( @@ -139,11 +277,25 @@ impl ArroyWrapper { dimension: usize, item: arroy::ItemId, ) -> Result { - if self.quantized { - arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item) - } else { - arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item) + for index in arroy_db_range_for_embedder(self.embedder_index) { + let contains = if self.quantized { + let writer = arroy::Writer::new(self.quantized_db(), index, dimension); + if writer.is_empty(rtxn)? { + break; + } + writer.contains_item(rtxn, item)? + } else { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + if writer.is_empty(rtxn)? { + break; + } + writer.contains_item(rtxn, item)? + }; + if contains { + return Ok(contains); + } } + Ok(false) } pub fn nns_by_item( @@ -152,45 +304,108 @@ impl ArroyWrapper { item: ItemId, limit: usize, filter: Option<&RoaringBitmap>, - ) -> Result>, arroy::Error> { + ) -> Result, arroy::Error> { if self.quantized { - arroy::Reader::open(rtxn, self.index, self.quantized_db())? - .nns_by_item(rtxn, item, limit, None, None, filter) + self._nns_by_item(rtxn, self.quantized_db(), item, limit, filter) } else { - arroy::Reader::open(rtxn, self.index, self.angular_db())? - .nns_by_item(rtxn, item, limit, None, None, filter) + self._nns_by_item(rtxn, self.angular_db(), item, limit, filter) } } + fn _nns_by_item( + &self, + rtxn: &RoTxn, + db: arroy::Database, + item: ItemId, + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result, arroy::Error> { + let mut results = Vec::new(); + + for reader in self.readers(rtxn, db) { + let reader = reader?; + let mut searcher = reader.nns(limit); + if let Some(filter) = filter { + searcher.candidates(filter); + } + + if let Some(mut ret) = searcher.by_item(rtxn, item)? { + results.append(&mut ret); + } else { + break; + } + } + results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); + Ok(results) + } + pub fn nns_by_vector( &self, - txn: &RoTxn, - item: &[f32], + rtxn: &RoTxn, + vector: &[f32], limit: usize, filter: Option<&RoaringBitmap>, ) -> Result, arroy::Error> { if self.quantized { - arroy::Reader::open(txn, self.index, self.quantized_db())? - .nns_by_vector(txn, item, limit, None, None, filter) + self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter) } else { - arroy::Reader::open(txn, self.index, self.angular_db())? - .nns_by_vector(txn, item, limit, None, None, filter) + self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter) } } - pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result>, arroy::Error> { + fn _nns_by_vector( + &self, + rtxn: &RoTxn, + db: arroy::Database, + vector: &[f32], + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result, arroy::Error> { + let mut results = Vec::new(); + + for reader in self.readers(rtxn, db) { + let reader = reader?; + let mut searcher = reader.nns(limit); + if let Some(filter) = filter { + searcher.candidates(filter); + } + + results.append(&mut searcher.by_vector(rtxn, vector)?); + } + + results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); + + Ok(results) + } + + pub fn item_vectors(&self, rtxn: &RoTxn, item_id: u32) -> Result>, arroy::Error> { + let mut vectors = Vec::new(); + if self.quantized { - arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid) + for reader in self.readers(rtxn, self.quantized_db()) { + if let Some(vec) = reader?.item_vector(rtxn, item_id)? { + vectors.push(vec); + } else { + break; + } + } } else { - arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid) + for reader in self.readers(rtxn, self.angular_db()) { + if let Some(vec) = reader?.item_vector(rtxn, item_id)? { + vectors.push(vec); + } else { + break; + } + } } + Ok(vectors) } - fn angular_db(&self) -> arroy::Database { + fn angular_db(&self) -> arroy::Database { self.database.remap_data_type() } - fn quantized_db(&self) -> arroy::Database { + fn quantized_db(&self) -> arroy::Database { self.database.remap_data_type() } } diff --git a/milli/src/vector/ollama.rs b/crates/milli/src/vector/ollama.rs similarity index 100% rename from milli/src/vector/ollama.rs rename to crates/milli/src/vector/ollama.rs diff --git a/milli/src/vector/openai.rs b/crates/milli/src/vector/openai.rs similarity index 100% rename from milli/src/vector/openai.rs rename to crates/milli/src/vector/openai.rs diff --git a/milli/src/vector/parsed_vectors.rs b/crates/milli/src/vector/parsed_vectors.rs similarity index 100% rename from milli/src/vector/parsed_vectors.rs rename to crates/milli/src/vector/parsed_vectors.rs diff --git a/milli/src/vector/rest.rs b/crates/milli/src/vector/rest.rs similarity index 100% rename from milli/src/vector/rest.rs rename to crates/milli/src/vector/rest.rs diff --git a/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs similarity index 97% rename from milli/src/vector/settings.rs rename to crates/milli/src/vector/settings.rs index 3bb7f09e6..d1cf364a2 100644 --- a/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -417,6 +417,8 @@ impl EmbeddingSettings { pub const DISTRIBUTION: &'static str = "distribution"; + pub const BINARY_QUANTIZED: &'static str = "binaryQuantized"; + pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] { match field { Self::SOURCE => &[ @@ -456,6 +458,13 @@ impl EmbeddingSettings { EmbedderSource::Rest, EmbedderSource::UserProvided, ], + Self::BINARY_QUANTIZED => &[ + EmbedderSource::HuggingFace, + EmbedderSource::Ollama, + EmbedderSource::OpenAi, + EmbedderSource::Rest, + EmbedderSource::UserProvided, + ], _other => unreachable!("unknown field"), } } @@ -470,6 +479,7 @@ impl EmbeddingSettings { Self::DIMENSIONS, Self::DISTRIBUTION, Self::URL, + Self::BINARY_QUANTIZED, ], EmbedderSource::HuggingFace => &[ Self::SOURCE, @@ -477,6 +487,7 @@ impl EmbeddingSettings { Self::REVISION, Self::DOCUMENT_TEMPLATE, Self::DISTRIBUTION, + Self::BINARY_QUANTIZED, ], EmbedderSource::Ollama => &[ Self::SOURCE, @@ -486,8 +497,11 @@ impl EmbeddingSettings { Self::API_KEY, Self::DIMENSIONS, Self::DISTRIBUTION, + Self::BINARY_QUANTIZED, ], - EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION], + EmbedderSource::UserProvided => { + &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION, Self::BINARY_QUANTIZED] + } EmbedderSource::Rest => &[ Self::SOURCE, Self::API_KEY, @@ -498,6 +512,7 @@ impl EmbeddingSettings { Self::RESPONSE, Self::HEADERS, Self::DISTRIBUTION, + Self::BINARY_QUANTIZED, ], } } diff --git a/milli/tests/assets/test_set.ndjson b/crates/milli/tests/assets/test_set.ndjson similarity index 100% rename from milli/tests/assets/test_set.ndjson rename to crates/milli/tests/assets/test_set.ndjson diff --git a/milli/tests/mod.rs b/crates/milli/tests/mod.rs similarity index 100% rename from milli/tests/mod.rs rename to crates/milli/tests/mod.rs diff --git a/milli/tests/search/distinct.rs b/crates/milli/tests/search/distinct.rs similarity index 100% rename from milli/tests/search/distinct.rs rename to crates/milli/tests/search/distinct.rs diff --git a/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs similarity index 100% rename from milli/tests/search/facet_distribution.rs rename to crates/milli/tests/search/facet_distribution.rs diff --git a/milli/tests/search/filters.rs b/crates/milli/tests/search/filters.rs similarity index 100% rename from milli/tests/search/filters.rs rename to crates/milli/tests/search/filters.rs diff --git a/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs similarity index 100% rename from milli/tests/search/mod.rs rename to crates/milli/tests/search/mod.rs diff --git a/milli/tests/search/phrase_search.rs b/crates/milli/tests/search/phrase_search.rs similarity index 100% rename from milli/tests/search/phrase_search.rs rename to crates/milli/tests/search/phrase_search.rs diff --git a/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs similarity index 100% rename from milli/tests/search/query_criteria.rs rename to crates/milli/tests/search/query_criteria.rs diff --git a/milli/tests/search/sort.rs b/crates/milli/tests/search/sort.rs similarity index 100% rename from milli/tests/search/sort.rs rename to crates/milli/tests/search/sort.rs diff --git a/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs similarity index 100% rename from milli/tests/search/typo_tolerance.rs rename to crates/milli/tests/search/typo_tolerance.rs diff --git a/permissive-json-pointer/Cargo.toml b/crates/permissive-json-pointer/Cargo.toml similarity index 100% rename from permissive-json-pointer/Cargo.toml rename to crates/permissive-json-pointer/Cargo.toml diff --git a/permissive-json-pointer/README.md b/crates/permissive-json-pointer/README.md similarity index 100% rename from permissive-json-pointer/README.md rename to crates/permissive-json-pointer/README.md diff --git a/permissive-json-pointer/src/lib.rs b/crates/permissive-json-pointer/src/lib.rs similarity index 100% rename from permissive-json-pointer/src/lib.rs rename to crates/permissive-json-pointer/src/lib.rs diff --git a/tracing-trace/Cargo.toml b/crates/tracing-trace/Cargo.toml similarity index 100% rename from tracing-trace/Cargo.toml rename to crates/tracing-trace/Cargo.toml diff --git a/tracing-trace/src/bin/trace-to-callstats.rs b/crates/tracing-trace/src/bin/trace-to-callstats.rs similarity index 100% rename from tracing-trace/src/bin/trace-to-callstats.rs rename to crates/tracing-trace/src/bin/trace-to-callstats.rs diff --git a/tracing-trace/src/bin/trace-to-firefox.rs b/crates/tracing-trace/src/bin/trace-to-firefox.rs similarity index 100% rename from tracing-trace/src/bin/trace-to-firefox.rs rename to crates/tracing-trace/src/bin/trace-to-firefox.rs diff --git a/tracing-trace/src/entry.rs b/crates/tracing-trace/src/entry.rs similarity index 100% rename from tracing-trace/src/entry.rs rename to crates/tracing-trace/src/entry.rs diff --git a/tracing-trace/src/error.rs b/crates/tracing-trace/src/error.rs similarity index 100% rename from tracing-trace/src/error.rs rename to crates/tracing-trace/src/error.rs diff --git a/tracing-trace/src/layer.rs b/crates/tracing-trace/src/layer.rs similarity index 100% rename from tracing-trace/src/layer.rs rename to crates/tracing-trace/src/layer.rs diff --git a/tracing-trace/src/lib.rs b/crates/tracing-trace/src/lib.rs similarity index 100% rename from tracing-trace/src/lib.rs rename to crates/tracing-trace/src/lib.rs diff --git a/tracing-trace/src/main.rs b/crates/tracing-trace/src/main.rs similarity index 100% rename from tracing-trace/src/main.rs rename to crates/tracing-trace/src/main.rs diff --git a/tracing-trace/src/processor/firefox_profiler.rs b/crates/tracing-trace/src/processor/firefox_profiler.rs similarity index 100% rename from tracing-trace/src/processor/firefox_profiler.rs rename to crates/tracing-trace/src/processor/firefox_profiler.rs diff --git a/tracing-trace/src/processor/fmt.rs b/crates/tracing-trace/src/processor/fmt.rs similarity index 100% rename from tracing-trace/src/processor/fmt.rs rename to crates/tracing-trace/src/processor/fmt.rs diff --git a/tracing-trace/src/processor/mod.rs b/crates/tracing-trace/src/processor/mod.rs similarity index 100% rename from tracing-trace/src/processor/mod.rs rename to crates/tracing-trace/src/processor/mod.rs diff --git a/tracing-trace/src/processor/span_stats.rs b/crates/tracing-trace/src/processor/span_stats.rs similarity index 100% rename from tracing-trace/src/processor/span_stats.rs rename to crates/tracing-trace/src/processor/span_stats.rs diff --git a/xtask/Cargo.toml b/crates/xtask/Cargo.toml similarity index 100% rename from xtask/Cargo.toml rename to crates/xtask/Cargo.toml diff --git a/xtask/src/bench/assets.rs b/crates/xtask/src/bench/assets.rs similarity index 100% rename from xtask/src/bench/assets.rs rename to crates/xtask/src/bench/assets.rs diff --git a/xtask/src/bench/client.rs b/crates/xtask/src/bench/client.rs similarity index 100% rename from xtask/src/bench/client.rs rename to crates/xtask/src/bench/client.rs diff --git a/xtask/src/bench/command.rs b/crates/xtask/src/bench/command.rs similarity index 100% rename from xtask/src/bench/command.rs rename to crates/xtask/src/bench/command.rs diff --git a/xtask/src/bench/dashboard.rs b/crates/xtask/src/bench/dashboard.rs similarity index 100% rename from xtask/src/bench/dashboard.rs rename to crates/xtask/src/bench/dashboard.rs diff --git a/xtask/src/bench/env_info.rs b/crates/xtask/src/bench/env_info.rs similarity index 100% rename from xtask/src/bench/env_info.rs rename to crates/xtask/src/bench/env_info.rs diff --git a/xtask/src/bench/meili_process.rs b/crates/xtask/src/bench/meili_process.rs similarity index 100% rename from xtask/src/bench/meili_process.rs rename to crates/xtask/src/bench/meili_process.rs diff --git a/xtask/src/bench/mod.rs b/crates/xtask/src/bench/mod.rs similarity index 100% rename from xtask/src/bench/mod.rs rename to crates/xtask/src/bench/mod.rs diff --git a/xtask/src/bench/workload.rs b/crates/xtask/src/bench/workload.rs similarity index 100% rename from xtask/src/bench/workload.rs rename to crates/xtask/src/bench/workload.rs diff --git a/xtask/src/lib.rs b/crates/xtask/src/lib.rs similarity index 100% rename from xtask/src/lib.rs rename to crates/xtask/src/lib.rs diff --git a/xtask/src/main.rs b/crates/xtask/src/main.rs similarity index 100% rename from xtask/src/main.rs rename to crates/xtask/src/main.rs diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs deleted file mode 100644 index 3c7ca0ed3..000000000 --- a/meilisearch/src/analytics/mod.rs +++ /dev/null @@ -1,137 +0,0 @@ -mod mock_analytics; -#[cfg(feature = "analytics")] -mod segment_analytics; - -use std::fs; -use std::path::{Path, PathBuf}; -use std::str::FromStr; - -use actix_web::HttpRequest; -use meilisearch_types::InstanceUid; -pub use mock_analytics::MockAnalytics; -use once_cell::sync::Lazy; -use platform_dirs::AppDirs; -use serde_json::Value; - -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; - -// if the analytics feature is disabled -// the `SegmentAnalytics` point to the mock instead of the real analytics -#[cfg(not(feature = "analytics"))] -pub type SegmentAnalytics = mock_analytics::MockAnalytics; -#[cfg(not(feature = "analytics"))] -pub type SearchAggregator = mock_analytics::SearchAggregator; -#[cfg(not(feature = "analytics"))] -pub type SimilarAggregator = mock_analytics::SimilarAggregator; -#[cfg(not(feature = "analytics"))] -pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator; -#[cfg(not(feature = "analytics"))] -pub type FacetSearchAggregator = mock_analytics::FacetSearchAggregator; - -// if the feature analytics is enabled we use the real analytics -#[cfg(feature = "analytics")] -pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; -#[cfg(feature = "analytics")] -pub type SearchAggregator = segment_analytics::SearchAggregator; -#[cfg(feature = "analytics")] -pub type SimilarAggregator = segment_analytics::SimilarAggregator; -#[cfg(feature = "analytics")] -pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator; -#[cfg(feature = "analytics")] -pub type FacetSearchAggregator = segment_analytics::FacetSearchAggregator; - -/// The Meilisearch config dir: -/// `~/.config/Meilisearch` on *NIX or *BSD. -/// `~/Library/ApplicationSupport` on macOS. -/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows. -static MEILISEARCH_CONFIG_PATH: Lazy> = - Lazy::new(|| AppDirs::new(Some("Meilisearch"), false).map(|appdir| appdir.config_dir)); - -fn config_user_id_path(db_path: &Path) -> Option { - db_path - .canonicalize() - .ok() - .map(|path| path.join("instance-uid").display().to_string().replace('/', "-")) - .zip(MEILISEARCH_CONFIG_PATH.as_ref()) - .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) -} - -/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` -fn find_user_id(db_path: &Path) -> Option { - fs::read_to_string(db_path.join("instance-uid")) - .ok() - .or_else(|| fs::read_to_string(config_user_id_path(db_path)?).ok()) - .and_then(|uid| InstanceUid::from_str(&uid).ok()) -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum DocumentDeletionKind { - PerDocumentId, - ClearAll, - PerBatch, - PerFilter, -} - -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum DocumentFetchKind { - PerDocumentId { retrieve_vectors: bool }, - Normal { with_filter: bool, limit: usize, offset: usize, retrieve_vectors: bool }, -} - -pub trait Analytics: Sync + Send { - fn instance_uid(&self) -> Option<&InstanceUid>; - - /// The method used to publish most analytics that do not need to be batched every hours - fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); - - /// This method should be called to aggregate a get search - fn get_search(&self, aggregate: SearchAggregator); - - /// This method should be called to aggregate a post search - fn post_search(&self, aggregate: SearchAggregator); - - /// This method should be called to aggregate a get similar request - fn get_similar(&self, aggregate: SimilarAggregator); - - /// This method should be called to aggregate a post similar request - fn post_similar(&self, aggregate: SimilarAggregator); - - /// This method should be called to aggregate a post array of searches - fn post_multi_search(&self, aggregate: MultiSearchAggregator); - - /// This method should be called to aggregate post facet values searches - fn post_facet_search(&self, aggregate: FacetSearchAggregator); - - // this method should be called to aggregate an add documents request - fn add_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - - // this method should be called to aggregate a fetch documents request - fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); - - // this method should be called to aggregate a fetch documents request - fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); - - // this method should be called to aggregate a add documents request - fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); - - // this method should be called to batch an update documents request - fn update_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - - // this method should be called to batch an update documents by function request - fn update_documents_by_function( - &self, - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ); -} diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs deleted file mode 100644 index f8d6a0fdc..000000000 --- a/meilisearch/src/analytics/segment_analytics.rs +++ /dev/null @@ -1,1994 +0,0 @@ -use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet}; -use std::fs; -use std::mem::take; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::{Duration, Instant}; - -use actix_web::http::header::{CONTENT_TYPE, USER_AGENT}; -use actix_web::HttpRequest; -use byte_unit::Byte; -use index_scheduler::IndexScheduler; -use meilisearch_auth::{AuthController, AuthFilter}; -use meilisearch_types::locales::Locale; -use meilisearch_types::InstanceUid; -use once_cell::sync::Lazy; -use regex::Regex; -use segment::message::{Identify, Track, User}; -use segment::{AutoBatcher, Batcher, HttpClient}; -use serde::Serialize; -use serde_json::{json, Value}; -use sysinfo::{Disks, System}; -use time::OffsetDateTime; -use tokio::select; -use tokio::sync::mpsc::{self, Receiver, Sender}; -use uuid::Uuid; - -use super::{ - config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, -}; -use crate::analytics::Analytics; -use crate::option::{ - default_http_addr, IndexerOpts, LogMode, MaxMemory, MaxThreads, ScheduleSnapshot, -}; -use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumentsQuery}; -use crate::routes::indexes::facet_search::FacetSearchQuery; -use crate::routes::{create_all_stats, Stats}; -use crate::search::{ - FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex, - SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, - DEFAULT_SEMANTIC_RATIO, -}; -use crate::Opt; - -const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; - -/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. -fn write_user_id(db_path: &Path, user_id: &InstanceUid) { - let _ = fs::write(db_path.join("instance-uid"), user_id.to_string()); - if let Some((meilisearch_config_path, user_id_path)) = - MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path)) - { - let _ = fs::create_dir_all(meilisearch_config_path); - let _ = fs::write(user_id_path, user_id.to_string()); - } -} - -const SEGMENT_API_KEY: &str = "P3FWhhEsJiEDCuEHpmcN9DHcK4hVfBvb"; - -pub fn extract_user_agents(request: &HttpRequest) -> Vec { - request - .headers() - .get(ANALYTICS_HEADER) - .or_else(|| request.headers().get(USER_AGENT)) - .and_then(|header| header.to_str().ok()) - .unwrap_or("unknown") - .split(';') - .map(str::trim) - .map(ToString::to_string) - .collect() -} - -pub enum AnalyticsMsg { - BatchMessage(Track), - AggregateGetSearch(SearchAggregator), - AggregatePostSearch(SearchAggregator), - AggregateGetSimilar(SimilarAggregator), - AggregatePostSimilar(SimilarAggregator), - AggregatePostMultiSearch(MultiSearchAggregator), - AggregatePostFacetSearch(FacetSearchAggregator), - AggregateAddDocuments(DocumentsAggregator), - AggregateDeleteDocuments(DocumentsDeletionAggregator), - AggregateUpdateDocuments(DocumentsAggregator), - AggregateEditDocumentsByFunction(EditDocumentsByFunctionAggregator), - AggregateGetFetchDocuments(DocumentsFetchAggregator), - AggregatePostFetchDocuments(DocumentsFetchAggregator), -} - -pub struct SegmentAnalytics { - instance_uid: InstanceUid, - sender: Sender, - user: User, -} - -impl SegmentAnalytics { - #[allow(clippy::new_ret_no_self)] - pub async fn new( - opt: &Opt, - index_scheduler: Arc, - auth_controller: Arc, - ) -> Arc { - let instance_uid = super::find_user_id(&opt.db_path); - let first_time_run = instance_uid.is_none(); - let instance_uid = instance_uid.unwrap_or_else(Uuid::new_v4); - write_user_id(&opt.db_path, &instance_uid); - - let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build(); - - // if reqwest throws an error we won't be able to send analytics - if client.is_err() { - return super::MockAnalytics::new(opt); - } - - let client = - HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string()); - let user = User::UserId { user_id: instance_uid.to_string() }; - let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string()); - - // If Meilisearch is Launched for the first time: - // 1. Send an event Launched associated to the user `total_launch`. - // 2. Batch an event Launched with the real instance-id and send it in one hour. - if first_time_run { - let _ = batcher - .push(Track { - user: User::UserId { user_id: "total_launch".to_string() }, - event: "Launched".to_string(), - ..Default::default() - }) - .await; - let _ = batcher.flush().await; - let _ = batcher - .push(Track { - user: user.clone(), - event: "Launched".to_string(), - ..Default::default() - }) - .await; - } - - let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize - - let segment = Box::new(Segment { - inbox, - user: user.clone(), - opt: opt.clone(), - batcher, - post_search_aggregator: SearchAggregator::default(), - post_multi_search_aggregator: MultiSearchAggregator::default(), - post_facet_search_aggregator: FacetSearchAggregator::default(), - get_search_aggregator: SearchAggregator::default(), - add_documents_aggregator: DocumentsAggregator::default(), - delete_documents_aggregator: DocumentsDeletionAggregator::default(), - update_documents_aggregator: DocumentsAggregator::default(), - edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator::default(), - get_fetch_documents_aggregator: DocumentsFetchAggregator::default(), - post_fetch_documents_aggregator: DocumentsFetchAggregator::default(), - get_similar_aggregator: SimilarAggregator::default(), - post_similar_aggregator: SimilarAggregator::default(), - }); - tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone())); - - let this = Self { instance_uid, sender, user: user.clone() }; - - Arc::new(this) - } -} - -impl super::Analytics for SegmentAnalytics { - fn instance_uid(&self) -> Option<&InstanceUid> { - Some(&self.instance_uid) - } - - fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { - let user_agent = request.map(extract_user_agents); - - send["user-agent"] = json!(user_agent); - let event = Track { - user: self.user.clone(), - event: event_name.clone(), - properties: send, - ..Default::default() - }; - let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event)); - } - - fn get_search(&self, aggregate: SearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); - } - - fn post_search(&self, aggregate: SearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); - } - - fn get_similar(&self, aggregate: SimilarAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSimilar(aggregate)); - } - - fn post_similar(&self, aggregate: SimilarAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSimilar(aggregate)); - } - - fn post_facet_search(&self, aggregate: FacetSearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFacetSearch(aggregate)); - } - - fn post_multi_search(&self, aggregate: MultiSearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate)); - } - - fn add_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); - } - - fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) { - let aggregate = DocumentsDeletionAggregator::from_query(kind, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate)); - } - - fn update_documents( - &self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); - } - - fn update_documents_by_function( - &self, - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ) { - let aggregate = - EditDocumentsByFunctionAggregator::from_query(documents_query, index_creation, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateEditDocumentsByFunction(aggregate)); - } - - fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { - let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate)); - } - - fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { - let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); - let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); - } -} - -/// This structure represent the `infos` field we send in the analytics. -/// It's quite close to the `Opt` structure except all sensitive informations -/// have been simplified to a boolean. -/// It's send as-is in amplitude thus you should never update a name of the -/// struct without the approval of the PM. -#[derive(Debug, Clone, Serialize)] -struct Infos { - env: String, - experimental_contains_filter: bool, - experimental_enable_metrics: bool, - experimental_search_queue_size: usize, - experimental_logs_mode: LogMode, - experimental_replication_parameters: bool, - experimental_enable_logs_route: bool, - experimental_reduce_indexing_memory_usage: bool, - experimental_max_number_of_batched_tasks: usize, - gpu_enabled: bool, - db_path: bool, - import_dump: bool, - dump_dir: bool, - ignore_missing_dump: bool, - ignore_dump_if_db_exists: bool, - import_snapshot: bool, - schedule_snapshot: Option, - snapshot_dir: bool, - ignore_missing_snapshot: bool, - ignore_snapshot_if_db_exists: bool, - http_addr: bool, - http_payload_size_limit: Byte, - task_queue_webhook: bool, - task_webhook_authorization_header: bool, - log_level: String, - max_indexing_memory: MaxMemory, - max_indexing_threads: MaxThreads, - with_configuration_file: bool, - ssl_auth_path: bool, - ssl_cert_path: bool, - ssl_key_path: bool, - ssl_ocsp_path: bool, - ssl_require_auth: bool, - ssl_resumption: bool, - ssl_tickets: bool, -} - -impl From for Infos { - fn from(options: Opt) -> Self { - // We wants to decompose this whole struct by hand to be sure we don't forget - // to add analytics when we add a field in the Opt. - // Thus we must not insert `..` at the end. - let Opt { - db_path, - experimental_contains_filter, - experimental_enable_metrics, - experimental_search_queue_size, - experimental_logs_mode, - experimental_replication_parameters, - experimental_enable_logs_route, - experimental_reduce_indexing_memory_usage, - experimental_max_number_of_batched_tasks, - http_addr, - master_key: _, - env, - task_webhook_url, - task_webhook_authorization_header, - max_index_size: _, - max_task_db_size: _, - http_payload_size_limit, - ssl_cert_path, - ssl_key_path, - ssl_auth_path, - ssl_ocsp_path, - ssl_require_auth, - ssl_resumption, - ssl_tickets, - import_snapshot, - ignore_missing_snapshot, - ignore_snapshot_if_db_exists, - snapshot_dir, - schedule_snapshot, - import_dump, - ignore_missing_dump, - ignore_dump_if_db_exists, - dump_dir, - log_level, - indexer_options, - config_file_path, - #[cfg(feature = "analytics")] - no_analytics: _, - } = options; - - let schedule_snapshot = match schedule_snapshot { - ScheduleSnapshot::Disabled => None, - ScheduleSnapshot::Enabled(interval) => Some(interval), - }; - - let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = - indexer_options; - - // We're going to override every sensible information. - // We consider information sensible if it contains a path, an address, or a key. - Self { - env, - experimental_contains_filter, - experimental_enable_metrics, - experimental_search_queue_size, - experimental_logs_mode, - experimental_replication_parameters, - experimental_enable_logs_route, - experimental_reduce_indexing_memory_usage, - gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(), - db_path: db_path != PathBuf::from("./data.ms"), - import_dump: import_dump.is_some(), - dump_dir: dump_dir != PathBuf::from("dumps/"), - ignore_missing_dump, - ignore_dump_if_db_exists, - import_snapshot: import_snapshot.is_some(), - schedule_snapshot, - snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"), - ignore_missing_snapshot, - ignore_snapshot_if_db_exists, - http_addr: http_addr != default_http_addr(), - http_payload_size_limit, - experimental_max_number_of_batched_tasks, - task_queue_webhook: task_webhook_url.is_some(), - task_webhook_authorization_header: task_webhook_authorization_header.is_some(), - log_level: log_level.to_string(), - max_indexing_memory, - max_indexing_threads, - with_configuration_file: config_file_path.is_some(), - ssl_auth_path: ssl_auth_path.is_some(), - ssl_cert_path: ssl_cert_path.is_some(), - ssl_key_path: ssl_key_path.is_some(), - ssl_ocsp_path: ssl_ocsp_path.is_some(), - ssl_require_auth, - ssl_resumption, - ssl_tickets, - } - } -} - -pub struct Segment { - inbox: Receiver, - user: User, - opt: Opt, - batcher: AutoBatcher, - get_search_aggregator: SearchAggregator, - post_search_aggregator: SearchAggregator, - post_multi_search_aggregator: MultiSearchAggregator, - post_facet_search_aggregator: FacetSearchAggregator, - add_documents_aggregator: DocumentsAggregator, - delete_documents_aggregator: DocumentsDeletionAggregator, - update_documents_aggregator: DocumentsAggregator, - edit_documents_by_function_aggregator: EditDocumentsByFunctionAggregator, - get_fetch_documents_aggregator: DocumentsFetchAggregator, - post_fetch_documents_aggregator: DocumentsFetchAggregator, - get_similar_aggregator: SimilarAggregator, - post_similar_aggregator: SimilarAggregator, -} - -impl Segment { - fn compute_traits(opt: &Opt, stats: Stats) -> Value { - static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); - static SYSTEM: Lazy = Lazy::new(|| { - let disks = Disks::new_with_refreshed_list(); - let mut sys = System::new_all(); - sys.refresh_all(); - let kernel_version = System::kernel_version() - .and_then(|k| k.split_once('-').map(|(k, _)| k.to_string())); - json!({ - "distribution": System::name(), - "kernel_version": kernel_version, - "cores": sys.cpus().len(), - "ram_size": sys.total_memory(), - "disk_size": disks.iter().map(|disk| disk.total_space()).max(), - "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), - }) - }); - let number_of_documents = - stats.indexes.values().map(|index| index.number_of_documents).collect::>(); - - json!({ - "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day - "system": *SYSTEM, - "stats": { - "database_size": stats.database_size, - "indexes_number": stats.indexes.len(), - "documents_number": number_of_documents, - }, - "infos": Infos::from(opt.clone()), - }) - } - - async fn run( - mut self, - index_scheduler: Arc, - auth_controller: Arc, - ) { - const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour - // The first batch must be sent after one hour. - let mut interval = - tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL); - - loop { - select! { - _ = interval.tick() => { - self.tick(index_scheduler.clone(), auth_controller.clone()).await; - }, - msg = self.inbox.recv() => { - match msg { - Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await), - Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostFacetSearch(agreg)) => self.post_facet_search_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateEditDocumentsByFunction(agreg)) => self.edit_documents_by_function_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregateGetSimilar(agreg)) => self.get_similar_aggregator.aggregate(agreg), - Some(AnalyticsMsg::AggregatePostSimilar(agreg)) => self.post_similar_aggregator.aggregate(agreg), - None => (), - } - } - } - } - } - - async fn tick( - &mut self, - index_scheduler: Arc, - auth_controller: Arc, - ) { - if let Ok(stats) = - create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default()) - { - // Replace the version number with the prototype name if any. - let version = if let Some(prototype) = build_info::DescribeResult::from_build() - .and_then(|describe| describe.as_prototype()) - { - prototype - } else { - env!("CARGO_PKG_VERSION") - }; - - let _ = self - .batcher - .push(Identify { - context: Some(json!({ - "app": { - "version": version.to_string(), - }, - })), - user: self.user.clone(), - traits: Self::compute_traits(&self.opt, stats), - ..Default::default() - }) - .await; - } - - let Segment { - inbox: _, - opt: _, - batcher: _, - user, - get_search_aggregator, - post_search_aggregator, - post_multi_search_aggregator, - post_facet_search_aggregator, - add_documents_aggregator, - delete_documents_aggregator, - update_documents_aggregator, - edit_documents_by_function_aggregator, - get_fetch_documents_aggregator, - post_fetch_documents_aggregator, - get_similar_aggregator, - post_similar_aggregator, - } = self; - - if let Some(get_search) = - take(get_search_aggregator).into_event(user, "Documents Searched GET") - { - let _ = self.batcher.push(get_search).await; - } - if let Some(post_search) = - take(post_search_aggregator).into_event(user, "Documents Searched POST") - { - let _ = self.batcher.push(post_search).await; - } - if let Some(post_multi_search) = take(post_multi_search_aggregator) - .into_event(user, "Documents Searched by Multi-Search POST") - { - let _ = self.batcher.push(post_multi_search).await; - } - if let Some(post_facet_search) = - take(post_facet_search_aggregator).into_event(user, "Facet Searched POST") - { - let _ = self.batcher.push(post_facet_search).await; - } - if let Some(add_documents) = - take(add_documents_aggregator).into_event(user, "Documents Added") - { - let _ = self.batcher.push(add_documents).await; - } - if let Some(delete_documents) = - take(delete_documents_aggregator).into_event(user, "Documents Deleted") - { - let _ = self.batcher.push(delete_documents).await; - } - if let Some(update_documents) = - take(update_documents_aggregator).into_event(user, "Documents Updated") - { - let _ = self.batcher.push(update_documents).await; - } - if let Some(edit_documents_by_function) = take(edit_documents_by_function_aggregator) - .into_event(user, "Documents Edited By Function") - { - let _ = self.batcher.push(edit_documents_by_function).await; - } - if let Some(get_fetch_documents) = - take(get_fetch_documents_aggregator).into_event(user, "Documents Fetched GET") - { - let _ = self.batcher.push(get_fetch_documents).await; - } - if let Some(post_fetch_documents) = - take(post_fetch_documents_aggregator).into_event(user, "Documents Fetched POST") - { - let _ = self.batcher.push(post_fetch_documents).await; - } - - if let Some(get_similar_documents) = - take(get_similar_aggregator).into_event(user, "Similar GET") - { - let _ = self.batcher.push(get_similar_documents).await; - } - - if let Some(post_similar_documents) = - take(post_similar_aggregator).into_event(user, "Similar POST") - { - let _ = self.batcher.push(post_similar_documents).await; - } - let _ = self.batcher.flush().await; - } -} - -#[derive(Default)] -pub struct SearchAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - - // requests - total_received: usize, - total_succeeded: usize, - total_degraded: usize, - total_used_negative_operator: usize, - time_spent: BinaryHeap, - - // sort - sort_with_geo_point: bool, - // every time a request has a filter, this field must be incremented by the number of terms it contains - sort_sum_of_criteria_terms: usize, - // every time a request has a filter, this field must be incremented by one - sort_total_number_of_criteria: usize, - - // distinct - distinct: bool, - - // filter - filter_with_geo_radius: bool, - filter_with_geo_bounding_box: bool, - // every time a request has a filter, this field must be incremented by the number of terms it contains - filter_sum_of_criteria_terms: usize, - // every time a request has a filter, this field must be incremented by one - filter_total_number_of_criteria: usize, - used_syntax: HashMap, - - // attributes_to_search_on - // every time a search is done using attributes_to_search_on - attributes_to_search_on_total_number_of_uses: usize, - - // q - // The maximum number of terms in a q request - max_terms_number: usize, - - // vector - // The maximum number of floats in a vector request - max_vector_size: usize, - // Whether the semantic ratio passed to a hybrid search equals the default ratio. - semantic_ratio: bool, - hybrid: bool, - retrieve_vectors: bool, - - // every time a search is done, we increment the counter linked to the used settings - matching_strategy: HashMap, - - // List of the unique Locales passed as parameter - locales: BTreeSet, - - // pagination - max_limit: usize, - max_offset: usize, - finite_pagination: usize, - - // formatting - max_attributes_to_retrieve: usize, - max_attributes_to_highlight: usize, - highlight_pre_tag: bool, - highlight_post_tag: bool, - max_attributes_to_crop: usize, - crop_marker: bool, - show_matches_position: bool, - crop_length: bool, - - // facets - facets_sum_of_terms: usize, - facets_total_number_of_facets: usize, - - // scoring - show_ranking_score: bool, - show_ranking_score_details: bool, - ranking_score_threshold: bool, -} - -impl SearchAggregator { - #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { - let SearchQuery { - q, - vector, - offset, - limit, - page, - hits_per_page, - attributes_to_retrieve: _, - retrieve_vectors, - attributes_to_crop: _, - crop_length, - attributes_to_highlight: _, - show_matches_position, - show_ranking_score, - show_ranking_score_details, - filter, - sort, - distinct, - facets: _, - highlight_pre_tag, - highlight_post_tag, - crop_marker, - matching_strategy, - attributes_to_search_on, - hybrid, - ranking_score_threshold, - locales, - } = query; - - let mut ret = Self::default(); - ret.timestamp = Some(OffsetDateTime::now_utc()); - - ret.total_received = 1; - ret.user_agents = extract_user_agents(request).into_iter().collect(); - - if let Some(ref sort) = sort { - ret.sort_total_number_of_criteria = 1; - ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint(")); - ret.sort_sum_of_criteria_terms = sort.len(); - } - - ret.distinct = distinct.is_some(); - - if let Some(ref filter) = filter { - static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); - ret.filter_total_number_of_criteria = 1; - - let syntax = match filter { - Value::String(_) => "string".to_string(), - Value::Array(values) => { - if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { - "mixed".to_string() - } else { - "array".to_string() - } - } - _ => "none".to_string(), - }; - // convert the string to a HashMap - ret.used_syntax.insert(syntax, 1); - - let stringified_filters = filter.to_string(); - ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); - ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); - } - - // attributes_to_search_on - if attributes_to_search_on.is_some() { - ret.attributes_to_search_on_total_number_of_uses = 1; - } - - if let Some(ref q) = q { - ret.max_terms_number = q.split_whitespace().count(); - } - - if let Some(ref vector) = vector { - ret.max_vector_size = vector.len(); - } - ret.retrieve_vectors |= retrieve_vectors; - - if query.is_finite_pagination() { - let limit = hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); - ret.max_limit = limit; - ret.max_offset = page.unwrap_or(1).saturating_sub(1) * limit; - ret.finite_pagination = 1; - } else { - ret.max_limit = *limit; - ret.max_offset = *offset; - ret.finite_pagination = 0; - } - - ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1); - - if let Some(locales) = locales { - ret.locales = locales.iter().copied().collect(); - } - - ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); - ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); - ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER(); - ret.crop_length = *crop_length != DEFAULT_CROP_LENGTH(); - ret.show_matches_position = *show_matches_position; - - ret.show_ranking_score = *show_ranking_score; - ret.show_ranking_score_details = *show_ranking_score_details; - ret.ranking_score_threshold = ranking_score_threshold.is_some(); - - if let Some(hybrid) = hybrid { - ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); - ret.hybrid = true; - } - - ret - } - - pub fn succeed(&mut self, result: &SearchResult) { - let SearchResult { - hits: _, - query: _, - processing_time_ms, - hits_info: _, - semantic_hit_count: _, - facet_distribution: _, - facet_stats: _, - degraded, - used_negative_operator, - } = result; - - self.total_succeeded = self.total_succeeded.saturating_add(1); - if *degraded { - self.total_degraded = self.total_degraded.saturating_add(1); - } - if *used_negative_operator { - self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1); - } - self.time_spent.push(*processing_time_ms as usize); - } - - /// Aggregate one [SearchAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - ref mut time_spent, - sort_with_geo_point, - sort_sum_of_criteria_terms, - sort_total_number_of_criteria, - distinct, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - attributes_to_search_on_total_number_of_uses, - max_terms_number, - max_vector_size, - retrieve_vectors, - matching_strategy, - max_limit, - max_offset, - finite_pagination, - max_attributes_to_retrieve, - max_attributes_to_highlight, - highlight_pre_tag, - highlight_post_tag, - max_attributes_to_crop, - crop_marker, - show_matches_position, - crop_length, - facets_sum_of_terms, - facets_total_number_of_facets, - show_ranking_score, - show_ranking_score_details, - semantic_ratio, - hybrid, - total_degraded, - total_used_negative_operator, - ranking_score_threshold, - ref mut locales, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - - // request - self.total_received = self.total_received.saturating_add(total_received); - self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.total_degraded = self.total_degraded.saturating_add(total_degraded); - self.total_used_negative_operator = - self.total_used_negative_operator.saturating_add(total_used_negative_operator); - self.time_spent.append(time_spent); - - // sort - self.sort_with_geo_point |= sort_with_geo_point; - self.sort_sum_of_criteria_terms = - self.sort_sum_of_criteria_terms.saturating_add(sort_sum_of_criteria_terms); - self.sort_total_number_of_criteria = - self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); - - // distinct - self.distinct |= distinct; - - // filter - self.filter_with_geo_radius |= filter_with_geo_radius; - self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; - self.filter_sum_of_criteria_terms = - self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); - self.filter_total_number_of_criteria = - self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); - for (key, value) in used_syntax.into_iter() { - let used_syntax = self.used_syntax.entry(key).or_insert(0); - *used_syntax = used_syntax.saturating_add(value); - } - - // attributes_to_search_on - self.attributes_to_search_on_total_number_of_uses = self - .attributes_to_search_on_total_number_of_uses - .saturating_add(attributes_to_search_on_total_number_of_uses); - - // q - self.max_terms_number = self.max_terms_number.max(max_terms_number); - - // vector - self.max_vector_size = self.max_vector_size.max(max_vector_size); - self.retrieve_vectors |= retrieve_vectors; - self.semantic_ratio |= semantic_ratio; - self.hybrid |= hybrid; - - // pagination - self.max_limit = self.max_limit.max(max_limit); - self.max_offset = self.max_offset.max(max_offset); - self.finite_pagination += finite_pagination; - - // formatting - self.max_attributes_to_retrieve = - self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); - self.max_attributes_to_highlight = - self.max_attributes_to_highlight.max(max_attributes_to_highlight); - self.highlight_pre_tag |= highlight_pre_tag; - self.highlight_post_tag |= highlight_post_tag; - self.max_attributes_to_crop = self.max_attributes_to_crop.max(max_attributes_to_crop); - self.crop_marker |= crop_marker; - self.show_matches_position |= show_matches_position; - self.crop_length |= crop_length; - - // facets - self.facets_sum_of_terms = self.facets_sum_of_terms.saturating_add(facets_sum_of_terms); - self.facets_total_number_of_facets = - self.facets_total_number_of_facets.saturating_add(facets_total_number_of_facets); - - // matching strategy - for (key, value) in matching_strategy.into_iter() { - let matching_strategy = self.matching_strategy.entry(key).or_insert(0); - *matching_strategy = matching_strategy.saturating_add(value); - } - - // scoring - self.show_ranking_score |= show_ranking_score; - self.show_ranking_score_details |= show_ranking_score_details; - self.ranking_score_threshold |= ranking_score_threshold; - - // locales - self.locales.append(locales); - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - time_spent, - sort_with_geo_point, - sort_sum_of_criteria_terms, - sort_total_number_of_criteria, - distinct, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - attributes_to_search_on_total_number_of_uses, - max_terms_number, - max_vector_size, - retrieve_vectors, - matching_strategy, - max_limit, - max_offset, - finite_pagination, - max_attributes_to_retrieve, - max_attributes_to_highlight, - highlight_pre_tag, - highlight_post_tag, - max_attributes_to_crop, - crop_marker, - show_matches_position, - crop_length, - facets_sum_of_terms, - facets_total_number_of_facets, - show_ranking_score, - show_ranking_score_details, - semantic_ratio, - hybrid, - total_degraded, - total_used_negative_operator, - ranking_score_threshold, - locales, - } = self; - - if total_received == 0 { - None - } else { - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); - - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - "total_degraded": total_degraded, - "total_used_negative_operator": total_used_negative_operator, - }, - "sort": { - "with_geoPoint": sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), - }, - "distinct": distinct, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "attributes_to_search_on": { - "total_number_of_uses": attributes_to_search_on_total_number_of_uses, - }, - "q": { - "max_terms_number": max_terms_number, - }, - "vector": { - "max_vector_size": max_vector_size, - "retrieve_vectors": retrieve_vectors, - }, - "hybrid": { - "enabled": hybrid, - "semantic_ratio": semantic_ratio, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - "most_used_navigation": if finite_pagination > (total_received / 2) { "exhaustive" } else { "estimated" }, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - "max_attributes_to_highlight": max_attributes_to_highlight, - "highlight_pre_tag": highlight_pre_tag, - "highlight_post_tag": highlight_post_tag, - "max_attributes_to_crop": max_attributes_to_crop, - "crop_marker": crop_marker, - "show_matches_position": show_matches_position, - "crop_length": crop_length, - }, - "facets": { - "avg_facets_number": format!("{:.2}", facets_sum_of_terms as f64 / facets_total_number_of_facets as f64), - }, - "matching_strategy": { - "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "locales": locales, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} - -#[derive(Default)] -pub struct MultiSearchAggregator { - timestamp: Option, - - // requests - total_received: usize, - total_succeeded: usize, - - // sum of the number of distinct indexes in each single request, use with total_received to compute an avg - total_distinct_index_count: usize, - // number of queries with a single index, use with total_received to compute a proportion - total_single_index: usize, - - // sum of the number of search queries in the requests, use with total_received to compute an average - total_search_count: usize, - - // scoring - show_ranking_score: bool, - show_ranking_score_details: bool, - - // federation - use_federation: bool, - - // context - user_agents: HashSet, -} - -impl MultiSearchAggregator { - pub fn from_federated_search( - federated_search: &FederatedSearch, - request: &HttpRequest, - ) -> Self { - let timestamp = Some(OffsetDateTime::now_utc()); - - let user_agents = extract_user_agents(request).into_iter().collect(); - - let use_federation = federated_search.federation.is_some(); - - let distinct_indexes: HashSet<_> = federated_search - .queries - .iter() - .map(|query| { - let query = &query; - // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex - let SearchQueryWithIndex { - index_uid, - federation_options: _, - q: _, - vector: _, - offset: _, - limit: _, - page: _, - hits_per_page: _, - attributes_to_retrieve: _, - retrieve_vectors: _, - attributes_to_crop: _, - crop_length: _, - attributes_to_highlight: _, - show_ranking_score: _, - show_ranking_score_details: _, - show_matches_position: _, - filter: _, - sort: _, - distinct: _, - facets: _, - highlight_pre_tag: _, - highlight_post_tag: _, - crop_marker: _, - matching_strategy: _, - attributes_to_search_on: _, - hybrid: _, - ranking_score_threshold: _, - locales: _, - } = query; - - index_uid.as_str() - }) - .collect(); - - let show_ranking_score = - federated_search.queries.iter().any(|query| query.show_ranking_score); - let show_ranking_score_details = - federated_search.queries.iter().any(|query| query.show_ranking_score_details); - - Self { - timestamp, - total_received: 1, - total_succeeded: 0, - total_distinct_index_count: distinct_indexes.len(), - total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, - total_search_count: federated_search.queries.len(), - show_ranking_score, - show_ranking_score_details, - user_agents, - use_federation, - } - } - - pub fn succeed(&mut self) { - self.total_succeeded = self.total_succeeded.saturating_add(1); - } - - /// Aggregate one [MultiSearchAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - // write the aggregate in a way that will cause a compilation error if a field is added. - - // get ownership of self, replacing it by a default value. - let this = std::mem::take(self); - - let timestamp = this.timestamp.or(other.timestamp); - let total_received = this.total_received.saturating_add(other.total_received); - let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded); - let total_distinct_index_count = - this.total_distinct_index_count.saturating_add(other.total_distinct_index_count); - let total_single_index = this.total_single_index.saturating_add(other.total_single_index); - let total_search_count = this.total_search_count.saturating_add(other.total_search_count); - let show_ranking_score = this.show_ranking_score || other.show_ranking_score; - let show_ranking_score_details = - this.show_ranking_score_details || other.show_ranking_score_details; - let mut user_agents = this.user_agents; - let use_federation = this.use_federation || other.use_federation; - - for user_agent in other.user_agents.into_iter() { - user_agents.insert(user_agent); - } - - // need all fields or compile error - let mut aggregated = Self { - timestamp, - total_received, - total_succeeded, - total_distinct_index_count, - total_single_index, - total_search_count, - user_agents, - show_ranking_score, - show_ranking_score_details, - use_federation, - // do not add _ or ..Default::default() here - }; - - // replace the default self with the aggregated value - std::mem::swap(self, &mut aggregated); - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { - timestamp, - total_received, - total_succeeded, - total_distinct_index_count, - total_single_index, - total_search_count, - user_agents, - show_ranking_score, - show_ranking_score_details, - use_federation, - } = self; - - if total_received == 0 { - None - } else { - let properties = json!({ - "user-agent": user_agents, - "requests": { - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "indexes": { - "total_single_index": total_single_index, - "total_distinct_index_count": total_distinct_index_count, - "avg_distinct_index_count": (total_distinct_index_count as f64) / (total_received as f64), // not 0 else returned early - }, - "searches": { - "total_search_count": total_search_count, - "avg_search_count": (total_search_count as f64) / (total_received as f64), - }, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - }, - "federation": { - "use_federation": use_federation, - } - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} - -#[derive(Default)] -pub struct FacetSearchAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - - // requests - total_received: usize, - total_succeeded: usize, - time_spent: BinaryHeap, - - // The set of all facetNames that were used - facet_names: HashSet, - - // As there been any other parameter than the facetName or facetQuery ones? - additional_search_parameters_provided: bool, -} - -impl FacetSearchAggregator { - #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &FacetSearchQuery, request: &HttpRequest) -> Self { - let FacetSearchQuery { - facet_query: _, - facet_name, - vector, - q, - filter, - matching_strategy, - attributes_to_search_on, - hybrid, - ranking_score_threshold, - locales, - } = query; - - let mut ret = Self::default(); - ret.timestamp = Some(OffsetDateTime::now_utc()); - - ret.total_received = 1; - ret.user_agents = extract_user_agents(request).into_iter().collect(); - ret.facet_names = Some(facet_name.clone()).into_iter().collect(); - - ret.additional_search_parameters_provided = q.is_some() - || vector.is_some() - || filter.is_some() - || *matching_strategy != MatchingStrategy::default() - || attributes_to_search_on.is_some() - || hybrid.is_some() - || ranking_score_threshold.is_some() - || locales.is_some(); - - ret - } - - pub fn succeed(&mut self, result: &FacetSearchResult) { - let FacetSearchResult { facet_hits: _, facet_query: _, processing_time_ms } = result; - self.total_succeeded = self.total_succeeded.saturating_add(1); - self.time_spent.push(*processing_time_ms as usize); - } - - /// Aggregate one [FacetSearchAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - ref mut time_spent, - facet_names, - additional_search_parameters_provided, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - - // request - self.total_received = self.total_received.saturating_add(total_received); - self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.time_spent.append(time_spent); - - // facet_names - for facet_name in facet_names.into_iter() { - self.facet_names.insert(facet_name); - } - - // additional_search_parameters_provided - self.additional_search_parameters_provided |= additional_search_parameters_provided; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - time_spent, - facet_names, - additional_search_parameters_provided, - } = self; - - if total_received == 0 { - None - } else { - // the index of the 99th percentage of value - let percentile_99th = 0.99 * (total_succeeded as f64 - 1.) + 1.; - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th as usize); - - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "facets": { - "total_distinct_facet_count": facet_names.len(), - "additional_search_parameters_provided": additional_search_parameters_provided, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} - -#[derive(Default)] -pub struct DocumentsAggregator { - timestamp: Option, - - // set to true when at least one request was received - updated: bool, - - // context - user_agents: HashSet, - - content_types: HashSet, - primary_keys: HashSet, - index_creation: bool, -} - -impl DocumentsAggregator { - pub fn from_query( - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) -> Self { - let UpdateDocumentsQuery { primary_key, csv_delimiter: _ } = documents_query; - - let mut primary_keys = HashSet::new(); - if let Some(primary_key) = primary_key.clone() { - primary_keys.insert(primary_key); - } - - let mut content_types = HashSet::new(); - let content_type = request - .headers() - .get(CONTENT_TYPE) - .and_then(|s| s.to_str().ok()) - .unwrap_or("unknown") - .to_string(); - content_types.insert(content_type); - - Self { - timestamp: Some(OffsetDateTime::now_utc()), - updated: true, - user_agents: extract_user_agents(request).into_iter().collect(), - content_types, - primary_keys, - index_creation, - } - } - - /// Aggregate one [DocumentsAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } = - other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - self.updated |= updated; - // we can't create a union because there is no `into_union` method - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - for primary_key in primary_keys { - self.primary_keys.insert(primary_key); - } - for content_type in content_types { - self.content_types.insert(content_type); - } - self.index_creation |= index_creation; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { timestamp, user_agents, primary_keys, content_types, index_creation, updated } = - self; - - if !updated { - None - } else { - let properties = json!({ - "user-agent": user_agents, - "payload_type": content_types, - "primary_key": primary_keys, - "index_creation": index_creation, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} - -#[derive(Default)] -pub struct EditDocumentsByFunctionAggregator { - timestamp: Option, - - // Set to true if at least one request was filtered - filtered: bool, - // Set to true if at least one request contained a context - with_context: bool, - - // context - user_agents: HashSet, - - index_creation: bool, -} - -impl EditDocumentsByFunctionAggregator { - pub fn from_query( - documents_query: &DocumentEditionByFunction, - index_creation: bool, - request: &HttpRequest, - ) -> Self { - let DocumentEditionByFunction { filter, context, function: _ } = documents_query; - - Self { - timestamp: Some(OffsetDateTime::now_utc()), - user_agents: extract_user_agents(request).into_iter().collect(), - filtered: filter.is_some(), - with_context: context.is_some(), - index_creation, - } - } - - /// Aggregate one [DocumentsAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { timestamp, user_agents, index_creation, filtered, with_context } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // we can't create a union because there is no `into_union` method - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - self.index_creation |= index_creation; - self.filtered |= filtered; - self.with_context |= with_context; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { timestamp, user_agents, index_creation, filtered, with_context } = self; - - let properties = json!({ - "user-agent": user_agents, - "filtered": filtered, - "with_context": with_context, - "index_creation": index_creation, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } -} - -#[derive(Default, Serialize)] -pub struct DocumentsDeletionAggregator { - #[serde(skip)] - timestamp: Option, - - // context - #[serde(rename = "user-agent")] - user_agents: HashSet, - - #[serde(rename = "requests.total_received")] - total_received: usize, - per_document_id: bool, - clear_all: bool, - per_batch: bool, - per_filter: bool, -} - -impl DocumentsDeletionAggregator { - pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self { - Self { - timestamp: Some(OffsetDateTime::now_utc()), - user_agents: extract_user_agents(request).into_iter().collect(), - total_received: 1, - per_document_id: matches!(kind, DocumentDeletionKind::PerDocumentId), - clear_all: matches!(kind, DocumentDeletionKind::ClearAll), - per_batch: matches!(kind, DocumentDeletionKind::PerBatch), - per_filter: matches!(kind, DocumentDeletionKind::PerFilter), - } - } - - /// Aggregate one [DocumentsAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { - timestamp, - user_agents, - total_received, - per_document_id, - clear_all, - per_batch, - per_filter, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // we can't create a union because there is no `into_union` method - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - self.total_received = self.total_received.saturating_add(total_received); - self.per_document_id |= per_document_id; - self.clear_all |= clear_all; - self.per_batch |= per_batch; - self.per_filter |= per_filter; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - // if we had no timestamp it means we never encountered any events and - // thus we don't need to send this event. - let timestamp = self.timestamp?; - - Some(Track { - timestamp: Some(timestamp), - user: user.clone(), - event: event_name.to_string(), - properties: serde_json::to_value(self).ok()?, - ..Default::default() - }) - } -} - -#[derive(Default, Serialize)] -pub struct DocumentsFetchAggregator { - #[serde(skip)] - timestamp: Option, - - // context - #[serde(rename = "user-agent")] - user_agents: HashSet, - - #[serde(rename = "requests.total_received")] - total_received: usize, - - // a call on ../documents/:doc_id - per_document_id: bool, - // if a filter was used - per_filter: bool, - - #[serde(rename = "vector.retrieve_vectors")] - retrieve_vectors: bool, - - // pagination - #[serde(rename = "pagination.max_limit")] - max_limit: usize, - #[serde(rename = "pagination.max_offset")] - max_offset: usize, -} - -impl DocumentsFetchAggregator { - pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { - let (limit, offset, retrieve_vectors) = match query { - DocumentFetchKind::PerDocumentId { retrieve_vectors } => (1, 0, *retrieve_vectors), - DocumentFetchKind::Normal { limit, offset, retrieve_vectors, .. } => { - (*limit, *offset, *retrieve_vectors) - } - }; - Self { - timestamp: Some(OffsetDateTime::now_utc()), - user_agents: extract_user_agents(request).into_iter().collect(), - total_received: 1, - per_document_id: matches!(query, DocumentFetchKind::PerDocumentId { .. }), - per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), - max_limit: limit, - max_offset: offset, - retrieve_vectors, - } - } - - /// Aggregate one [DocumentsFetchAggregator] into another. - pub fn aggregate(&mut self, other: Self) { - let Self { - timestamp, - user_agents, - total_received, - per_document_id, - per_filter, - max_limit, - max_offset, - retrieve_vectors, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - for user_agent in user_agents { - self.user_agents.insert(user_agent); - } - - self.total_received = self.total_received.saturating_add(total_received); - self.per_document_id |= per_document_id; - self.per_filter |= per_filter; - - self.max_limit = self.max_limit.max(max_limit); - self.max_offset = self.max_offset.max(max_offset); - - self.retrieve_vectors |= retrieve_vectors; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - // if we had no timestamp it means we never encountered any events and - // thus we don't need to send this event. - let timestamp = self.timestamp?; - - Some(Track { - timestamp: Some(timestamp), - user: user.clone(), - event: event_name.to_string(), - properties: serde_json::to_value(self).ok()?, - ..Default::default() - }) - } -} - -#[derive(Default)] -pub struct SimilarAggregator { - timestamp: Option, - - // context - user_agents: HashSet, - - // requests - total_received: usize, - total_succeeded: usize, - time_spent: BinaryHeap, - - // filter - filter_with_geo_radius: bool, - filter_with_geo_bounding_box: bool, - // every time a request has a filter, this field must be incremented by the number of terms it contains - filter_sum_of_criteria_terms: usize, - // every time a request has a filter, this field must be incremented by one - filter_total_number_of_criteria: usize, - used_syntax: HashMap, - - // Whether a non-default embedder was specified - retrieve_vectors: bool, - - // pagination - max_limit: usize, - max_offset: usize, - - // formatting - max_attributes_to_retrieve: usize, - - // scoring - show_ranking_score: bool, - show_ranking_score_details: bool, - ranking_score_threshold: bool, -} - -impl SimilarAggregator { - #[allow(clippy::field_reassign_with_default)] - pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { - let SimilarQuery { - id: _, - embedder: _, - offset, - limit, - attributes_to_retrieve: _, - retrieve_vectors, - show_ranking_score, - show_ranking_score_details, - filter, - ranking_score_threshold, - } = query; - - let mut ret = Self::default(); - ret.timestamp = Some(OffsetDateTime::now_utc()); - - ret.total_received = 1; - ret.user_agents = extract_user_agents(request).into_iter().collect(); - - if let Some(ref filter) = filter { - static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); - ret.filter_total_number_of_criteria = 1; - - let syntax = match filter { - Value::String(_) => "string".to_string(), - Value::Array(values) => { - if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { - "mixed".to_string() - } else { - "array".to_string() - } - } - _ => "none".to_string(), - }; - // convert the string to a HashMap - ret.used_syntax.insert(syntax, 1); - - let stringified_filters = filter.to_string(); - ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox("); - ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); - } - - ret.max_limit = *limit; - ret.max_offset = *offset; - - ret.show_ranking_score = *show_ranking_score; - ret.show_ranking_score_details = *show_ranking_score_details; - ret.ranking_score_threshold = ranking_score_threshold.is_some(); - - ret.retrieve_vectors = *retrieve_vectors; - - ret - } - - pub fn succeed(&mut self, result: &SimilarResult) { - let SimilarResult { id: _, hits: _, processing_time_ms, hits_info: _ } = result; - - self.total_succeeded = self.total_succeeded.saturating_add(1); - - self.time_spent.push(*processing_time_ms as usize); - } - - /// Aggregate one [SimilarAggregator] into another. - pub fn aggregate(&mut self, mut other: Self) { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - ref mut time_spent, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - max_limit, - max_offset, - max_attributes_to_retrieve, - show_ranking_score, - show_ranking_score_details, - ranking_score_threshold, - retrieve_vectors, - } = other; - - if self.timestamp.is_none() { - self.timestamp = timestamp; - } - - // context - for user_agent in user_agents.into_iter() { - self.user_agents.insert(user_agent); - } - - // request - self.total_received = self.total_received.saturating_add(total_received); - self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); - self.time_spent.append(time_spent); - - // filter - self.filter_with_geo_radius |= filter_with_geo_radius; - self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; - self.filter_sum_of_criteria_terms = - self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms); - self.filter_total_number_of_criteria = - self.filter_total_number_of_criteria.saturating_add(filter_total_number_of_criteria); - for (key, value) in used_syntax.into_iter() { - let used_syntax = self.used_syntax.entry(key).or_insert(0); - *used_syntax = used_syntax.saturating_add(value); - } - - self.retrieve_vectors |= retrieve_vectors; - - // pagination - self.max_limit = self.max_limit.max(max_limit); - self.max_offset = self.max_offset.max(max_offset); - - // formatting - self.max_attributes_to_retrieve = - self.max_attributes_to_retrieve.max(max_attributes_to_retrieve); - - // scoring - self.show_ranking_score |= show_ranking_score; - self.show_ranking_score_details |= show_ranking_score_details; - self.ranking_score_threshold |= ranking_score_threshold; - } - - pub fn into_event(self, user: &User, event_name: &str) -> Option { - let Self { - timestamp, - user_agents, - total_received, - total_succeeded, - time_spent, - filter_with_geo_radius, - filter_with_geo_bounding_box, - filter_sum_of_criteria_terms, - filter_total_number_of_criteria, - used_syntax, - max_limit, - max_offset, - max_attributes_to_retrieve, - show_ranking_score, - show_ranking_score_details, - ranking_score_threshold, - retrieve_vectors, - } = self; - - if total_received == 0 { - None - } else { - // we get all the values in a sorted manner - let time_spent = time_spent.into_sorted_vec(); - // the index of the 99th percentage of value - let percentile_99th = time_spent.len() * 99 / 100; - // We are only interested by the slowest value of the 99th fastest results - let time_spent = time_spent.get(percentile_99th); - - let properties = json!({ - "user-agent": user_agents, - "requests": { - "99th_response_time": time_spent.map(|t| format!("{:.2}", t)), - "total_succeeded": total_succeeded, - "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics - "total_received": total_received, - }, - "filter": { - "with_geoRadius": filter_with_geo_radius, - "with_geoBoundingBox": filter_with_geo_bounding_box, - "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64), - "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "vector": { - "retrieve_vectors": retrieve_vectors, - }, - "pagination": { - "max_limit": max_limit, - "max_offset": max_offset, - }, - "formatting": { - "max_attributes_to_retrieve": max_attributes_to_retrieve, - }, - "scoring": { - "show_ranking_score": show_ranking_score, - "show_ranking_score_details": show_ranking_score_details, - "ranking_score_threshold": ranking_score_threshold, - }, - }); - - Some(Track { - timestamp, - user: user.clone(), - event: event_name.to_string(), - properties, - ..Default::default() - }) - } - } -} diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs deleted file mode 100644 index aaf8673d0..000000000 --- a/meilisearch/src/routes/indexes/settings.rs +++ /dev/null @@ -1,901 +0,0 @@ -use actix_web::web::Data; -use actix_web::{web, HttpRequest, HttpResponse}; -use deserr::actix_web::AwebJson; -use index_scheduler::IndexScheduler; -use meilisearch_types::deserr::DeserrJsonError; -use meilisearch_types::error::ResponseError; -use meilisearch_types::facet_values_sort::FacetValuesSort; -use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::milli::update::Setting; -use meilisearch_types::settings::{settings, RankingRuleView, SecretPolicy, Settings, Unchecked}; -use meilisearch_types::tasks::KindWithContent; -use serde_json::json; -use tracing::debug; - -use crate::analytics::Analytics; -use crate::extractors::authentication::policies::*; -use crate::extractors::authentication::GuardedData; -use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView}; -use crate::Opt; - -#[macro_export] -macro_rules! make_setting_route { - ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => { - pub mod $attr { - use actix_web::web::Data; - use actix_web::{web, HttpRequest, HttpResponse, Resource}; - use index_scheduler::IndexScheduler; - use meilisearch_types::error::ResponseError; - use meilisearch_types::index_uid::IndexUid; - use meilisearch_types::milli::update::Setting; - use meilisearch_types::settings::{settings, Settings}; - use meilisearch_types::tasks::KindWithContent; - use tracing::debug; - use $crate::analytics::Analytics; - use $crate::extractors::authentication::policies::*; - use $crate::extractors::authentication::GuardedData; - use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::Opt; - use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView}; - - pub async fn delete( - index_scheduler: GuardedData< - ActionPolicy<{ actions::SETTINGS_UPDATE }>, - Data, - >, - index_uid: web::Path, - req: HttpRequest, - opt: web::Data, - ) -> Result { - let index_uid = IndexUid::try_from(index_uid.into_inner())?; - - let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() }; - - let allow_index_creation = - index_scheduler.filters().allow_index_creation(&index_uid); - - let task = KindWithContent::SettingsUpdate { - index_uid: index_uid.to_string(), - new_settings: Box::new(new_settings), - is_deletion: true, - allow_index_creation, - }; - let uid = get_task_id(&req, &opt)?; - let dry_run = is_dry_run(&req, &opt)?; - let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) - .await?? - .into(); - - debug!(returns = ?task, "Delete settings"); - Ok(HttpResponse::Accepted().json(task)) - } - - pub async fn update( - index_scheduler: GuardedData< - ActionPolicy<{ actions::SETTINGS_UPDATE }>, - Data, - >, - index_uid: actix_web::web::Path, - body: deserr::actix_web::AwebJson, $err_ty>, - req: HttpRequest, - opt: web::Data, - $analytics_var: web::Data, - ) -> std::result::Result { - let index_uid = IndexUid::try_from(index_uid.into_inner())?; - - let body = body.into_inner(); - debug!(parameters = ?body, "Update settings"); - - #[allow(clippy::redundant_closure_call)] - $analytics(&body, &req); - - let new_settings = Settings { - $attr: match body { - Some(inner_body) => Setting::Set(inner_body).into(), - None => Setting::Reset.into(), - }, - ..Default::default() - }; - - let new_settings = $crate::routes::indexes::settings::validate_settings( - new_settings, - &index_scheduler, - )?; - - let allow_index_creation = - index_scheduler.filters().allow_index_creation(&index_uid); - - let task = KindWithContent::SettingsUpdate { - index_uid: index_uid.to_string(), - new_settings: Box::new(new_settings), - is_deletion: false, - allow_index_creation, - }; - let uid = get_task_id(&req, &opt)?; - let dry_run = is_dry_run(&req, &opt)?; - let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) - .await?? - .into(); - - debug!(returns = ?task, "Update settings"); - Ok(HttpResponse::Accepted().json(task)) - } - - pub async fn get( - index_scheduler: GuardedData< - ActionPolicy<{ actions::SETTINGS_GET }>, - Data, - >, - index_uid: actix_web::web::Path, - ) -> std::result::Result { - let index_uid = IndexUid::try_from(index_uid.into_inner())?; - - let index = index_scheduler.index(&index_uid)?; - let rtxn = index.read_txn()?; - let settings = settings(&index, &rtxn, meilisearch_types::settings::SecretPolicy::HideSecrets)?; - - debug!(returns = ?settings, "Update settings"); - - Ok(HttpResponse::Ok().json(settings.$attr)) - } - - pub fn resources() -> Resource { - Resource::new($route) - .route(web::get().to(SeqHandler(get))) - .route(web::$update_verb().to(SeqHandler(update))) - .route(web::delete().to(SeqHandler(delete))) - } - } - }; -} - -make_setting_route!( - "/filterable-attributes", - put, - std::collections::BTreeSet, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes, - >, - filterable_attributes, - "filterableAttributes", - analytics, - |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "FilterableAttributes Updated".to_string(), - json!({ - "filterable_attributes": { - "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), - } - }), - Some(req), - ); - } -); - -make_setting_route!( - "/sortable-attributes", - put, - std::collections::BTreeSet, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes, - >, - sortable_attributes, - "sortableAttributes", - analytics, - |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "SortableAttributes Updated".to_string(), - json!({ - "sortable_attributes": { - "total": setting.as_ref().map(|sort| sort.len()), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/displayed-attributes", - put, - Vec, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes, - >, - displayed_attributes, - "displayedAttributes", - analytics, - |displayed: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "DisplayedAttributes Updated".to_string(), - json!({ - "displayed_attributes": { - "total": displayed.as_ref().map(|displayed| displayed.len()), - "with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/typo-tolerance", - patch, - meilisearch_types::settings::TypoSettings, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance, - >, - typo_tolerance, - "typoTolerance", - analytics, - |setting: &Option, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "TypoTolerance Updated".to_string(), - json!({ - "typo_tolerance": { - "enabled": setting.as_ref().map(|s| !matches!(s.enabled, Setting::Set(false))), - "disable_on_attributes": setting - .as_ref() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": setting - .as_ref() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.one_typo.set())) - .flatten(), - "min_word_size_for_two_typos": setting - .as_ref() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.two_typos.set())) - .flatten(), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/searchable-attributes", - put, - Vec, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes, - >, - searchable_attributes, - "searchableAttributes", - analytics, - |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "SearchableAttributes Updated".to_string(), - json!({ - "searchable_attributes": { - "total": setting.as_ref().map(|searchable| searchable.len()), - "with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/stop-words", - put, - std::collections::BTreeSet, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsStopWords, - >, - stop_words, - "stopWords", - analytics, - |stop_words: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "StopWords Updated".to_string(), - json!({ - "stop_words": { - "total": stop_words.as_ref().map(|stop_words| stop_words.len()), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/non-separator-tokens", - put, - std::collections::BTreeSet, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens, - >, - non_separator_tokens, - "nonSeparatorTokens", - analytics, - |non_separator_tokens: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "nonSeparatorTokens Updated".to_string(), - json!({ - "non_separator_tokens": { - "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/separator-tokens", - put, - std::collections::BTreeSet, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens, - >, - separator_tokens, - "separatorTokens", - analytics, - |separator_tokens: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "separatorTokens Updated".to_string(), - json!({ - "separator_tokens": { - "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/dictionary", - put, - std::collections::BTreeSet, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsDictionary, - >, - dictionary, - "dictionary", - analytics, - |dictionary: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "dictionary Updated".to_string(), - json!({ - "dictionary": { - "total": dictionary.as_ref().map(|dictionary| dictionary.len()), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/synonyms", - put, - std::collections::BTreeMap>, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms, - >, - synonyms, - "synonyms", - analytics, - |synonyms: &Option>>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "Synonyms Updated".to_string(), - json!({ - "synonyms": { - "total": synonyms.as_ref().map(|synonyms| synonyms.len()), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/distinct-attribute", - put, - String, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute, - >, - distinct_attribute, - "distinctAttribute", - analytics, - |distinct: &Option, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "DistinctAttribute Updated".to_string(), - json!({ - "distinct_attribute": { - "set": distinct.is_some(), - } - }), - Some(req), - ); - } -); - -make_setting_route!( - "/proximity-precision", - put, - meilisearch_types::settings::ProximityPrecisionView, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision, - >, - proximity_precision, - "proximityPrecision", - analytics, - |precision: &Option, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "ProximityPrecision Updated".to_string(), - json!({ - "proximity_precision": { - "set": precision.is_some(), - "value": precision.unwrap_or_default(), - } - }), - Some(req), - ); - } -); - -make_setting_route!( - "/localized-attributes", - put, - Vec, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes, - >, - localized_attributes, - "localizedAttributes", - analytics, - |rules: &Option>, req: &HttpRequest| { - use serde_json::json; - analytics.publish( - "LocalizedAttributesRules Updated".to_string(), - json!({ - "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::>()) - }), - Some(req), - ); - } -); - -make_setting_route!( - "/ranking-rules", - put, - Vec, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules, - >, - ranking_rules, - "rankingRules", - analytics, - |setting: &Option>, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "RankingRules Updated".to_string(), - json!({ - "ranking_rules": { - "words_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Words))), - "typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Typo))), - "proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Proximity))), - "attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Attribute))), - "sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Sort))), - "exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Exactness))), - "values": setting.as_ref().map(|rr| rr.iter().filter(|s| matches!(s, meilisearch_types::settings::RankingRuleView::Asc(_) | meilisearch_types::settings::RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::>().join(", ")), - } - }), - Some(req), - ); - } -); - -make_setting_route!( - "/faceting", - patch, - meilisearch_types::settings::FacetingSettings, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsFaceting, - >, - faceting, - "faceting", - analytics, - |setting: &Option, req: &HttpRequest| { - use serde_json::json; - use meilisearch_types::facet_values_sort::FacetValuesSort; - - analytics.publish( - "Faceting Updated".to_string(), - json!({ - "faceting": { - "max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()), - "sort_facet_values_by_star_count": setting.as_ref().and_then(|s| { - s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - "sort_facet_values_by_total": setting.as_ref().and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/pagination", - patch, - meilisearch_types::settings::PaginationSettings, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsPagination, - >, - pagination, - "pagination", - analytics, - |setting: &Option, req: &HttpRequest| { - use serde_json::json; - - analytics.publish( - "Pagination Updated".to_string(), - json!({ - "pagination": { - "max_total_hits": setting.as_ref().and_then(|s| s.max_total_hits.set()), - }, - }), - Some(req), - ); - } -); - -make_setting_route!( - "/embedders", - patch, - std::collections::BTreeMap>, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders, - >, - embedders, - "embedders", - analytics, - |setting: &Option>>, req: &HttpRequest| { - - - analytics.publish( - "Embedders Updated".to_string(), - serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}), - Some(req), - ); - } -); - -fn embedder_analytics( - setting: Option< - &std::collections::BTreeMap< - String, - Setting, - >, - >, -) -> serde_json::Value { - let mut sources = std::collections::HashSet::new(); - - if let Some(s) = &setting { - for source in s - .values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.source.set()) - { - use meilisearch_types::milli::vector::settings::EmbedderSource; - match source { - EmbedderSource::OpenAi => sources.insert("openAi"), - EmbedderSource::HuggingFace => sources.insert("huggingFace"), - EmbedderSource::UserProvided => sources.insert("userProvided"), - EmbedderSource::Ollama => sources.insert("ollama"), - EmbedderSource::Rest => sources.insert("rest"), - }; - } - }; - - let document_template_used = setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.document_template.set().is_some()) - }); - - let document_template_max_bytes = setting.as_ref().and_then(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .filter_map(|config| config.document_template_max_bytes.set()) - .max() - }); - - let binary_quantization_used = setting.as_ref().map(|map| { - map.values() - .filter_map(|config| config.clone().set()) - .any(|config| config.binary_quantized.set().is_some()) - }); - - json!( - { - "total": setting.as_ref().map(|s| s.len()), - "sources": sources, - "document_template_used": document_template_used, - "document_template_max_bytes": document_template_max_bytes, - "binary_quantization_used": binary_quantization_used, - } - ) -} - -make_setting_route!( - "/search-cutoff-ms", - put, - u64, - meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs, - >, - search_cutoff_ms, - "searchCutoffMs", - analytics, - |setting: &Option, req: &HttpRequest| { - analytics.publish( - "Search Cutoff Updated".to_string(), - serde_json::json!({"search_cutoff_ms": setting }), - Some(req), - ); - } -); - -macro_rules! generate_configure { - ($($mod:ident),*) => { - pub fn configure(cfg: &mut web::ServiceConfig) { - use crate::extractors::sequential_extractor::SeqHandler; - cfg.service( - web::resource("") - .route(web::patch().to(SeqHandler(update_all))) - .route(web::get().to(SeqHandler(get_all))) - .route(web::delete().to(SeqHandler(delete_all)))) - $(.service($mod::resources()))*; - } - }; -} - -generate_configure!( - filterable_attributes, - sortable_attributes, - displayed_attributes, - localized_attributes, - searchable_attributes, - distinct_attribute, - proximity_precision, - stop_words, - separator_tokens, - non_separator_tokens, - dictionary, - synonyms, - ranking_rules, - typo_tolerance, - pagination, - faceting, - embedders, - search_cutoff_ms -); - -pub async fn update_all( - index_scheduler: GuardedData, Data>, - index_uid: web::Path, - body: AwebJson, DeserrJsonError>, - req: HttpRequest, - opt: web::Data, - analytics: web::Data, -) -> Result { - let index_uid = IndexUid::try_from(index_uid.into_inner())?; - - let new_settings = body.into_inner(); - debug!(parameters = ?new_settings, "Update all settings"); - let new_settings = validate_settings(new_settings, &index_scheduler)?; - - analytics.publish( - "Settings Updated".to_string(), - json!({ - "ranking_rules": { - "words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Words))), - "typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Typo))), - "proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Proximity))), - "attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Attribute))), - "sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Sort))), - "exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| matches!(s, RankingRuleView::Exactness))), - "values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !matches!(s, RankingRuleView::Asc(_) | RankingRuleView::Desc(_)) ).map(|x| x.to_string()).collect::>().join(", ")), - }, - "searchable_attributes": { - "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), - "with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), - }, - "displayed_attributes": { - "total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()), - "with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), - }, - "sortable_attributes": { - "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), - "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), - }, - "filterable_attributes": { - "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), - "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), - }, - "distinct_attribute": { - "set": new_settings.distinct_attribute.as_ref().set().is_some() - }, - "proximity_precision": { - "set": new_settings.proximity_precision.as_ref().set().is_some(), - "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default() - }, - "typo_tolerance": { - "enabled": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.enabled.as_ref().set()) - .copied(), - "disable_on_attributes": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.one_typo.set())) - .flatten(), - "min_word_size_for_two_typos": new_settings.typo_tolerance - .as_ref() - .set() - .and_then(|s| s.min_word_size_for_typos - .as_ref() - .set() - .map(|s| s.two_typos.set())) - .flatten(), - }, - "faceting": { - "max_values_per_facet": new_settings.faceting - .as_ref() - .set() - .and_then(|s| s.max_values_per_facet.as_ref().set()), - "sort_facet_values_by_star_count": new_settings.faceting - .as_ref() - .set() - .and_then(|s| { - s.sort_facet_values_by.as_ref().set().map(|s| s.iter().any(|(k, v)| k == "*" && v == &FacetValuesSort::Count)) - }), - "sort_facet_values_by_total": new_settings.faceting - .as_ref() - .set() - .and_then(|s| s.sort_facet_values_by.as_ref().set().map(|s| s.len())), - }, - "pagination": { - "max_total_hits": new_settings.pagination - .as_ref() - .set() - .and_then(|s| s.max_total_hits.as_ref().set()), - }, - "stop_words": { - "total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()), - }, - "synonyms": { - "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), - }, - "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), - "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(), - "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::>()), - }), - Some(&req), - ); - - let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); - let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); - let task = KindWithContent::SettingsUpdate { - index_uid, - new_settings: Box::new(new_settings), - is_deletion: false, - allow_index_creation, - }; - let uid = get_task_id(&req, &opt)?; - let dry_run = is_dry_run(&req, &opt)?; - let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) - .await?? - .into(); - - debug!(returns = ?task, "Update all settings"); - Ok(HttpResponse::Accepted().json(task)) -} - -pub async fn get_all( - index_scheduler: GuardedData, Data>, - index_uid: web::Path, -) -> Result { - let index_uid = IndexUid::try_from(index_uid.into_inner())?; - - let index = index_scheduler.index(&index_uid)?; - let rtxn = index.read_txn()?; - let new_settings = settings(&index, &rtxn, SecretPolicy::HideSecrets)?; - debug!(returns = ?new_settings, "Get all settings"); - Ok(HttpResponse::Ok().json(new_settings)) -} - -pub async fn delete_all( - index_scheduler: GuardedData, Data>, - index_uid: web::Path, - req: HttpRequest, - opt: web::Data, -) -> Result { - let index_uid = IndexUid::try_from(index_uid.into_inner())?; - - let new_settings = Settings::cleared().into_unchecked(); - - let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid); - let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); - let task = KindWithContent::SettingsUpdate { - index_uid, - new_settings: Box::new(new_settings), - is_deletion: true, - allow_index_creation, - }; - let uid = get_task_id(&req, &opt)?; - let dry_run = is_dry_run(&req, &opt)?; - let task: SummarizedTaskView = - tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run)) - .await?? - .into(); - - debug!(returns = ?task, "Delete all settings"); - Ok(HttpResponse::Accepted().json(task)) -} - -fn validate_settings( - settings: Settings, - index_scheduler: &IndexScheduler, -) -> Result, ResponseError> { - if matches!(settings.embedders, Setting::Set(_)) { - index_scheduler.features().check_vector("Passing `embedders` in settings")? - } - Ok(settings.validate()?) -} diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs deleted file mode 100644 index 0b82c394a..000000000 --- a/meilitool/src/main.rs +++ /dev/null @@ -1,752 +0,0 @@ -use std::fs::{read_dir, read_to_string, remove_file, File}; -use std::io::BufWriter; -use std::path::PathBuf; - -use anyhow::{bail, Context}; -use clap::{Parser, Subcommand}; -use dump::{DumpWriter, IndexMetadata}; -use file_store::FileStore; -use meilisearch_auth::AuthController; -use meilisearch_types::heed::types::{SerdeJson, Str}; -use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; -use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; -use meilisearch_types::milli::index::{db_name, main_key}; -use meilisearch_types::milli::{obkv_to_json, BEU32}; -use meilisearch_types::tasks::{Status, Task}; -use meilisearch_types::versioning::{create_version_file, get_version, parse_version}; -use meilisearch_types::Index; -use time::macros::format_description; -use time::OffsetDateTime; -use uuid_codec::UuidCodec; - -mod uuid_codec; - -#[derive(Parser)] -#[command(author, version, about, long_about = None)] -struct Cli { - /// The database path where the Meilisearch is running. - #[arg(long, default_value = "data.ms/")] - db_path: PathBuf, - - #[command(subcommand)] - command: Command, -} - -#[derive(Subcommand)] -enum Command { - /// Clears the task queue and make it empty. - /// - /// This command can be safely executed even if Meilisearch is running and processing tasks. - /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible, - /// even the ones that were processing. However, it's highly possible that you see the processing - /// tasks in the queue again with an associated internal error message. - ClearTaskQueue, - - /// Exports a dump from the Meilisearch database. - /// - /// Make sure to run this command when Meilisearch is not running or running but not processing tasks. - /// If tasks are being processed while a dump is being exported there are chances for the dump to be - /// malformed with missing tasks. - /// - /// TODO Verify this claim or make sure it cannot happen and we can export dumps - /// without caring about killing Meilisearch first! - ExportADump { - /// The directory in which the dump will be created. - #[arg(long, default_value = "dumps/")] - dump_dir: PathBuf, - - /// Skip dumping the enqueued or processing tasks. - /// - /// Can be useful when there are a lot of them and it is not particularly useful - /// to keep them. Note that only the enqueued tasks takes up space so skipping - /// the processed ones is not particularly interesting. - #[arg(long)] - skip_enqueued_tasks: bool, - }, - - /// Attempts to upgrade from one major version to the next without a dump. - /// - /// Make sure to run this commmand when Meilisearch is not running! - /// If Meilisearch is running while executing this command, the database could be corrupted - /// (contain data from both the old and the new versions) - /// - /// Supported upgrade paths: - /// - /// - v1.9.0 -> v1.10.0 - OfflineUpgrade { - #[arg(long)] - target_version: String, - }, -} - -fn main() -> anyhow::Result<()> { - let Cli { db_path, command } = Cli::parse(); - - let detected_version = get_version(&db_path).context("While checking the version file")?; - - match command { - Command::ClearTaskQueue => clear_task_queue(db_path), - Command::ExportADump { dump_dir, skip_enqueued_tasks } => { - export_a_dump(db_path, dump_dir, skip_enqueued_tasks) - } - Command::OfflineUpgrade { target_version } => { - let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; - OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() - } - } -} - -struct OfflineUpgrade { - db_path: PathBuf, - current_version: (String, String, String), - target_version: (String, String, String), -} - -impl OfflineUpgrade { - fn upgrade(self) -> anyhow::Result<()> { - // TODO: if we make this process support more versions, introduce a more flexible way of checking for the version - // currently only supports v1.9 to v1.10 - let (current_major, current_minor, current_patch) = &self.current_version; - - match (current_major.as_str(), current_minor.as_str(), current_patch.as_str()) { - ("1", "9", _) => {} - _ => { - bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9") - } - } - - let (target_major, target_minor, target_patch) = &self.target_version; - - match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { - ("1", "10", _) => {} - _ => { - bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10") - } - } - - println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); - - self.v1_9_to_v1_10()?; - - println!("Writing VERSION file"); - - create_version_file(&self.db_path, target_major, target_minor, target_patch) - .context("while writing VERSION file after the upgrade")?; - - println!("Success"); - - Ok(()) - } - - fn v1_9_to_v1_10(&self) -> anyhow::Result<()> { - // 2 changes here - - // 1. date format. needs to be done before opening the Index - // 2. REST embedders. We don't support this case right now, so bail - - let index_scheduler_path = self.db_path.join("tasks"); - let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } - .with_context(|| { - format!("While trying to open {:?}", index_scheduler_path.display()) - })?; - - let mut sched_wtxn = env.write_txn()?; - - let index_mapping: Database = - try_opening_database(&env, &sched_wtxn, "index-mapping")?; - - let index_stats: Database = - try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| { - format!("While trying to open {:?}", index_scheduler_path.display()) - })?; - - let index_count = - index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?; - - // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn - // 1. immutably for the iteration - // 2. mutably for updating index stats - let indexes: Vec<_> = index_mapping - .iter(&sched_wtxn)? - .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) - .collect(); - - let mut rest_embedders = Vec::new(); - - let mut unwrapped_indexes = Vec::new(); - - // check that update can take place - for (index_index, result) in indexes.into_iter().enumerate() { - let (uid, uuid) = result?; - let index_path = self.db_path.join("indexes").join(uuid.to_string()); - - println!( - "[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`", - index_index + 1, - index_path.display() - ); - - let index_env = unsafe { - // FIXME: fetch the 25 magic number from the index file - EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { - format!("while opening index {uid} at '{}'", index_path.display()) - })? - }; - - let index_txn = index_env.read_txn().with_context(|| { - format!( - "while obtaining a write transaction for index {uid} at {}", - index_path.display() - ) - })?; - - println!("\t- Checking for incompatible embedders (REST embedders)"); - let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?; - - if rest_embedders_for_index.is_empty() { - unwrapped_indexes.push((uid, uuid)); - } else { - // no need to add to unwrapped indexes because we'll exit early - rest_embedders.push((uid, rest_embedders_for_index)); - } - } - - if !rest_embedders.is_empty() { - let rest_embedders = rest_embedders - .into_iter() - .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders)) - .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`")) - .collect::>() - .join("\n"); - bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\ - The database has not been modified and is still a valid v1.9 database."); - } - - println!("Update can take place, updating"); - - for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() { - let index_path = self.db_path.join("indexes").join(uuid.to_string()); - - println!( - "[{}/{index_count}]Updating index `{uid}` at `{}`", - index_index + 1, - index_path.display() - ); - - let index_env = unsafe { - // FIXME: fetch the 25 magic number from the index file - EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { - format!("while opening index {uid} at '{}'", index_path.display()) - })? - }; - - let mut index_wtxn = index_env.write_txn().with_context(|| { - format!( - "while obtaining a write transaction for index `{uid}` at `{}`", - index_path.display() - ) - })?; - - println!("\t- Updating index stats"); - update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?; - println!("\t- Updating date format"); - update_date_format(&uid, &index_env, &mut index_wtxn)?; - - index_wtxn.commit().with_context(|| { - format!( - "while committing the write txn for index `{uid}` at {}", - index_path.display() - ) - })?; - } - - sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?; - - println!("Upgrading database succeeded"); - - Ok(()) - } -} - -pub mod v1_9 { - pub type FieldDistribution = std::collections::BTreeMap; - - /// The statistics that can be computed from an `Index` object. - #[derive(serde::Serialize, serde::Deserialize, Debug)] - pub struct IndexStats { - /// Number of documents in the index. - pub number_of_documents: u64, - /// Size taken up by the index' DB, in bytes. - /// - /// This includes the size taken by both the used and free pages of the DB, and as the free pages - /// are not returned to the disk after a deletion, this number is typically larger than - /// `used_database_size` that only includes the size of the used pages. - pub database_size: u64, - /// Size taken by the used pages of the index' DB, in bytes. - /// - /// As the DB backend does not return to the disk the pages that are not currently used by the DB, - /// this value is typically smaller than `database_size`. - pub used_database_size: u64, - /// Association of every field name with the number of times it occurs in the documents. - pub field_distribution: FieldDistribution, - /// Creation date of the index. - pub created_at: time::OffsetDateTime, - /// Date of the last update of the index. - pub updated_at: time::OffsetDateTime, - } - - use serde::{Deserialize, Serialize}; - - #[derive(Debug, Deserialize, Serialize)] - pub struct IndexEmbeddingConfig { - pub name: String, - pub config: EmbeddingConfig, - } - - #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] - pub struct EmbeddingConfig { - /// Options of the embedder, specific to each kind of embedder - pub embedder_options: EmbedderOptions, - } - - /// Options of an embedder, specific to each kind of embedder. - #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] - pub enum EmbedderOptions { - HuggingFace(hf::EmbedderOptions), - OpenAi(openai::EmbedderOptions), - Ollama(ollama::EmbedderOptions), - UserProvided(manual::EmbedderOptions), - Rest(rest::EmbedderOptions), - } - - impl Default for EmbedderOptions { - fn default() -> Self { - Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None }) - } - } - - mod hf { - #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] - pub struct EmbedderOptions { - pub model: String, - pub revision: Option, - } - } - mod openai { - - #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] - pub struct EmbedderOptions { - pub api_key: Option, - pub dimensions: Option, - } - } - mod ollama { - #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] - pub struct EmbedderOptions { - pub embedding_model: String, - pub url: Option, - pub api_key: Option, - } - } - mod manual { - #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] - pub struct EmbedderOptions { - pub dimensions: usize, - } - } - mod rest { - #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)] - pub struct EmbedderOptions { - pub api_key: Option, - pub dimensions: Option, - pub url: String, - pub input_field: Vec, - // path to the array of embeddings - pub path_to_embeddings: Vec, - // shape of a single embedding - pub embedding_object: Vec, - } - } - - pub type OffsetDateTime = time::OffsetDateTime; -} - -pub mod v1_10 { - use crate::v1_9; - - pub type FieldDistribution = std::collections::BTreeMap; - - /// The statistics that can be computed from an `Index` object. - #[derive(serde::Serialize, serde::Deserialize, Debug)] - pub struct IndexStats { - /// Number of documents in the index. - pub number_of_documents: u64, - /// Size taken up by the index' DB, in bytes. - /// - /// This includes the size taken by both the used and free pages of the DB, and as the free pages - /// are not returned to the disk after a deletion, this number is typically larger than - /// `used_database_size` that only includes the size of the used pages. - pub database_size: u64, - /// Size taken by the used pages of the index' DB, in bytes. - /// - /// As the DB backend does not return to the disk the pages that are not currently used by the DB, - /// this value is typically smaller than `database_size`. - pub used_database_size: u64, - /// Association of every field name with the number of times it occurs in the documents. - pub field_distribution: FieldDistribution, - /// Creation date of the index. - #[serde(with = "time::serde::rfc3339")] - pub created_at: time::OffsetDateTime, - /// Date of the last update of the index. - #[serde(with = "time::serde::rfc3339")] - pub updated_at: time::OffsetDateTime, - } - - impl From for IndexStats { - fn from( - v1_9::IndexStats { - number_of_documents, - database_size, - used_database_size, - field_distribution, - created_at, - updated_at, - }: v1_9::IndexStats, - ) -> Self { - IndexStats { - number_of_documents, - database_size, - used_database_size, - field_distribution, - created_at, - updated_at, - } - } - } - - #[derive(serde::Serialize, serde::Deserialize)] - #[serde(transparent)] - pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime); -} - -fn update_index_stats( - index_stats: Database, - index_uid: &str, - index_uuid: uuid::Uuid, - sched_wtxn: &mut RwTxn, -) -> anyhow::Result<()> { - let ctx = || format!("while updating index stats for index `{index_uid}`"); - - let stats: Option = index_stats - .remap_data_type::>() - .get(sched_wtxn, &index_uuid) - .with_context(ctx)?; - - if let Some(stats) = stats { - let stats: v1_10::IndexStats = stats.into(); - - index_stats - .remap_data_type::>() - .put(sched_wtxn, &index_uuid, &stats) - .with_context(ctx)?; - } - - Ok(()) -} - -fn update_date_format( - index_uid: &str, - index_env: &Env, - index_wtxn: &mut RwTxn, -) -> anyhow::Result<()> { - let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) - .with_context(|| format!("while updating date format for index `{index_uid}`"))?; - - date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?; - date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?; - - Ok(()) -} - -fn find_rest_embedders( - index_uid: &str, - index_env: &Env, - index_txn: &RoTxn, -) -> anyhow::Result> { - let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) - .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?; - - let mut rest_embedders = vec![]; - - for config in main - .remap_types::>>() - .get(index_txn, main_key::EMBEDDING_CONFIGS)? - .unwrap_or_default() - { - if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options { - rest_embedders.push(config.name); - } - } - - Ok(rest_embedders) -} - -fn date_round_trip( - wtxn: &mut RwTxn, - index_uid: &str, - db: Database, - key: &str, -) -> anyhow::Result<()> { - let datetime = - db.remap_types::>().get(wtxn, key).with_context( - || format!("could not read `{key}` while updating date format for index `{index_uid}`"), - )?; - - if let Some(datetime) = datetime { - db.remap_types::>() - .put(wtxn, key, &v1_10::OffsetDateTime(datetime)) - .with_context(|| { - format!( - "could not write `{key}` while updating date format for index `{index_uid}`" - ) - })?; - } - - Ok(()) -} - -/// Clears the task queue located at `db_path`. -fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { - let path = db_path.join("tasks"); - let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) } - .with_context(|| format!("While trying to open {:?}", path.display()))?; - - eprintln!("Deleting tasks from the database..."); - - let mut wtxn = env.write_txn()?; - let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?; - let total = all_tasks.len(&wtxn)?; - let status = try_opening_poly_database(&env, &wtxn, "status")?; - let kind = try_opening_poly_database(&env, &wtxn, "kind")?; - let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?; - let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?; - let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?; - let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?; - let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?; - - try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?; - try_clearing_poly_database(&mut wtxn, status, "status")?; - try_clearing_poly_database(&mut wtxn, kind, "kind")?; - try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?; - try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?; - try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?; - try_clearing_poly_database(&mut wtxn, started_at, "started-at")?; - try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?; - - wtxn.commit().context("While committing the transaction")?; - - eprintln!("Successfully deleted {total} tasks from the tasks database!"); - eprintln!("Deleting the content files from disk..."); - - let mut count = 0usize; - let update_files = db_path.join("update_files"); - let entries = read_dir(&update_files).with_context(|| { - format!("While trying to read the content of {:?}", update_files.display()) - })?; - for result in entries { - match result { - Ok(ent) => match remove_file(ent.path()) { - Ok(_) => count += 1, - Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e), - }, - Err(e) => { - eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e) - } - } - } - - eprintln!("Successfully deleted {count} content files from disk!"); - - Ok(()) -} - -fn try_opening_database( - env: &Env, - rtxn: &RoTxn, - db_name: &str, -) -> anyhow::Result> { - env.open_database(rtxn, Some(db_name)) - .with_context(|| format!("While opening the {db_name:?} database"))? - .with_context(|| format!("Missing the {db_name:?} database")) -} - -fn try_opening_poly_database( - env: &Env, - rtxn: &RoTxn, - db_name: &str, -) -> anyhow::Result> { - env.database_options() - .name(db_name) - .open(rtxn) - .with_context(|| format!("While opening the {db_name:?} poly database"))? - .with_context(|| format!("Missing the {db_name:?} poly database")) -} - -fn try_clearing_poly_database( - wtxn: &mut RwTxn, - database: Database, - db_name: &str, -) -> anyhow::Result<()> { - database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database")) -} - -/// Exports a dump into the dump directory. -fn export_a_dump( - db_path: PathBuf, - dump_dir: PathBuf, - skip_enqueued_tasks: bool, -) -> Result<(), anyhow::Error> { - let started_at = OffsetDateTime::now_utc(); - - // 1. Extracts the instance UID from disk - let instance_uid_path = db_path.join("instance-uid"); - let instance_uid = match read_to_string(&instance_uid_path) { - Ok(content) => match content.trim().parse() { - Ok(uuid) => Some(uuid), - Err(e) => { - eprintln!("Impossible to parse instance-uid: {e}"); - None - } - }, - Err(e) => { - eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e); - None - } - }; - - let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?; - let file_store = - FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?; - - let index_scheduler_path = db_path.join("tasks"); - let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } - .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; - - eprintln!("Dumping the keys..."); - - // 2. dump the keys - let auth_store = AuthController::new(&db_path, &None) - .with_context(|| format!("While opening the auth store at {}", db_path.display()))?; - let mut dump_keys = dump.create_keys()?; - let mut count = 0; - for key in auth_store.list_keys()? { - dump_keys.push_key(&key)?; - count += 1; - } - dump_keys.flush()?; - - eprintln!("Successfully dumped {count} keys!"); - - let rtxn = env.read_txn()?; - let all_tasks: Database> = - try_opening_database(&env, &rtxn, "all-tasks")?; - let index_mapping: Database = - try_opening_database(&env, &rtxn, "index-mapping")?; - - if skip_enqueued_tasks { - eprintln!("Skip dumping the enqueued tasks..."); - } else { - eprintln!("Dumping the enqueued tasks..."); - - // 3. dump the tasks - let mut dump_tasks = dump.create_tasks_queue()?; - let mut count = 0; - for ret in all_tasks.iter(&rtxn)? { - let (_, t) = ret?; - let status = t.status; - let content_file = t.content_uuid(); - let mut dump_content_file = dump_tasks.push_task(&t.into())?; - - // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. - if let Some(content_file_uuid) = content_file { - if status == Status::Enqueued { - let content_file = file_store.get_update(content_file_uuid)?; - - let reader = - DocumentsBatchReader::from_reader(content_file).with_context(|| { - format!("While reading content file {:?}", content_file_uuid) - })?; - - let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index(); - while let Some(doc) = cursor.next_document().with_context(|| { - format!("While iterating on content file {:?}", content_file_uuid) - })? { - dump_content_file - .push_document(&obkv_to_object(doc, &documents_batch_index)?)?; - } - dump_content_file.flush()?; - count += 1; - } - } - } - dump_tasks.flush()?; - - eprintln!("Successfully dumped {count} enqueued tasks!"); - } - - eprintln!("Dumping the indexes..."); - - // 4. Dump the indexes - let mut count = 0; - for result in index_mapping.iter(&rtxn)? { - let (uid, uuid) = result?; - let index_path = db_path.join("indexes").join(uuid.to_string()); - let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| { - format!("While trying to open the index at path {:?}", index_path.display()) - })?; - - let rtxn = index.read_txn()?; - let metadata = IndexMetadata { - uid: uid.to_owned(), - primary_key: index.primary_key(&rtxn)?.map(String::from), - created_at: index.created_at(&rtxn)?, - updated_at: index.updated_at(&rtxn)?, - }; - let mut index_dumper = dump.create_index(uid, &metadata)?; - - let fields_ids_map = index.fields_ids_map(&rtxn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - - // 4.1. Dump the documents - for ret in index.all_documents(&rtxn)? { - let (_id, doc) = ret?; - let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?; - index_dumper.push_document(&document)?; - } - - // 4.2. Dump the settings - let settings = meilisearch_types::settings::settings( - &index, - &rtxn, - meilisearch_types::settings::SecretPolicy::RevealSecrets, - )?; - index_dumper.settings(&settings)?; - count += 1; - } - - eprintln!("Successfully dumped {count} indexes!"); - // We will not dump experimental feature settings - eprintln!("The tool is not dumping experimental features, please set them by hand afterward"); - - let dump_uid = started_at.format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )).unwrap(); - - let path = dump_dir.join(format!("{}.dump", dump_uid)); - let file = File::create(&path)?; - dump.persist_to(BufWriter::new(file))?; - - eprintln!("Dump exported at path {:?}", path.display()); - - Ok(()) -} diff --git a/milli/src/update/new/extract/cache.rs b/milli/src/update/new/extract/cache.rs deleted file mode 100644 index cbb42af8b..000000000 --- a/milli/src/update/new/extract/cache.rs +++ /dev/null @@ -1,299 +0,0 @@ -use std::fmt::Write as _; -use std::mem; -use std::num::NonZeroUsize; - -use grenad::{MergeFunction, Sorter}; -use roaring::bitmap::Statistics; -use roaring::RoaringBitmap; -use smallvec::SmallVec; - -use super::lru::Lru; -use crate::update::del_add::{DelAdd, KvWriterDelAdd}; -use crate::CboRoaringBitmapCodec; - -const KEY_SIZE: usize = 12; - -#[derive(Debug)] -pub struct CboCachedSorter { - cache: Lru, DelAddRoaringBitmap>, - sorter: Sorter, - deladd_buffer: Vec, - cbo_buffer: Vec, - total_insertions: usize, - fitted_in_key: usize, -} - -impl CboCachedSorter { - pub fn new(cap: NonZeroUsize, sorter: Sorter) -> Self { - CboCachedSorter { - cache: Lru::new(cap), - sorter, - deladd_buffer: Vec::new(), - cbo_buffer: Vec::new(), - total_insertions: 0, - fitted_in_key: 0, - } - } -} - -impl CboCachedSorter { - pub fn insert_del_u32(&mut self, key: &[u8], n: u32) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del, add: _ }) => { - del.get_or_insert_with(RoaringBitmap::default).insert(n); - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_del_u32(n); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - pub fn insert_del( - &mut self, - key: &[u8], - bitmap: RoaringBitmap, - ) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del, add: _ }) => { - *del.get_or_insert_with(RoaringBitmap::default) |= bitmap; - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_del(bitmap); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - pub fn insert_add_u32(&mut self, key: &[u8], n: u32) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del: _, add }) => { - add.get_or_insert_with(RoaringBitmap::default).insert(n); - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_add_u32(n); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - pub fn insert_add( - &mut self, - key: &[u8], - bitmap: RoaringBitmap, - ) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del: _, add }) => { - *add.get_or_insert_with(RoaringBitmap::default) |= bitmap; - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_add(bitmap); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - pub fn insert_del_add_u32(&mut self, key: &[u8], n: u32) -> grenad::Result<(), MF::Error> { - match self.cache.get_mut(key) { - Some(DelAddRoaringBitmap { del, add }) => { - del.get_or_insert_with(RoaringBitmap::default).insert(n); - add.get_or_insert_with(RoaringBitmap::default).insert(n); - } - None => { - self.total_insertions += 1; - self.fitted_in_key += (key.len() <= KEY_SIZE) as usize; - let value = DelAddRoaringBitmap::new_del_add_u32(n); - if let Some((key, deladd)) = self.cache.push(key.into(), value) { - self.write_entry(key, deladd)?; - } - } - } - - Ok(()) - } - - fn write_entry>( - &mut self, - key: A, - deladd: DelAddRoaringBitmap, - ) -> grenad::Result<(), MF::Error> { - /// TODO we must create a serialization trait to correctly serialize bitmaps - self.deladd_buffer.clear(); - let mut value_writer = KvWriterDelAdd::new(&mut self.deladd_buffer); - match deladd { - DelAddRoaringBitmap { del: Some(del), add: None } => { - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?; - } - DelAddRoaringBitmap { del: None, add: Some(add) } => { - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?; - } - DelAddRoaringBitmap { del: Some(del), add: Some(add) } => { - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&del, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Deletion, &self.cbo_buffer)?; - - self.cbo_buffer.clear(); - CboRoaringBitmapCodec::serialize_into(&add, &mut self.cbo_buffer); - value_writer.insert(DelAdd::Addition, &self.cbo_buffer)?; - } - DelAddRoaringBitmap { del: None, add: None } => return Ok(()), - } - let bytes = value_writer.into_inner().unwrap(); - self.sorter.insert(key, bytes) - } - - pub fn direct_insert(&mut self, key: &[u8], val: &[u8]) -> grenad::Result<(), MF::Error> { - self.sorter.insert(key, val) - } - - pub fn into_sorter(mut self) -> grenad::Result, MF::Error> { - let mut all_n_containers = Vec::new(); - let mut all_n_array_containers = Vec::new(); - let mut all_n_bitset_containers = Vec::new(); - let mut all_n_values_array_containers = Vec::new(); - let mut all_n_values_bitset_containers = Vec::new(); - let mut all_cardinality = Vec::new(); - - let default_arc = Lru::new(NonZeroUsize::MIN); - for (key, deladd) in mem::replace(&mut self.cache, default_arc) { - for bitmap in [&deladd.del, &deladd.add].into_iter().flatten() { - let Statistics { - n_containers, - n_array_containers, - n_bitset_containers, - n_values_array_containers, - n_values_bitset_containers, - cardinality, - .. - } = bitmap.statistics(); - all_n_containers.push(n_containers); - all_n_array_containers.push(n_array_containers); - all_n_bitset_containers.push(n_bitset_containers); - all_n_values_array_containers.push(n_values_array_containers); - all_n_values_bitset_containers.push(n_values_bitset_containers); - all_cardinality.push(cardinality as u32); - } - - self.write_entry(key, deladd)?; - } - - let mut output = String::new(); - - for (name, mut slice) in [ - ("n_containers", all_n_containers), - ("n_array_containers", all_n_array_containers), - ("n_bitset_containers", all_n_bitset_containers), - ("n_values_array_containers", all_n_values_array_containers), - ("n_values_bitset_containers", all_n_values_bitset_containers), - ("cardinality", all_cardinality), - ] { - let _ = writeln!(&mut output, "{name} (p100) {:?}", Stats::from_slice(&mut slice)); - // let _ = writeln!(&mut output, "{name} (p99) {:?}", Stats::from_slice_p99(&mut slice)); - } - - let _ = writeln!( - &mut output, - "LruCache stats: {} <= {KEY_SIZE} bytes ({}%) on a total of {} insertions", - self.fitted_in_key, - (self.fitted_in_key as f32 / self.total_insertions as f32) * 100.0, - self.total_insertions, - ); - - eprintln!("{output}"); - - Ok(self.sorter) - } -} - -#[derive(Default, Debug)] -struct Stats { - pub len: usize, - pub average: f32, - pub mean: u32, - pub min: u32, - pub max: u32, -} - -impl Stats { - fn from_slice(slice: &mut [u32]) -> Stats { - slice.sort_unstable(); - Self::from_sorted_slice(slice) - } - - fn from_slice_p99(slice: &mut [u32]) -> Stats { - slice.sort_unstable(); - let new_len = slice.len() - (slice.len() as f32 / 100.0) as usize; - match slice.get(..new_len) { - Some(slice) => Self::from_sorted_slice(slice), - None => Stats::default(), - } - } - - fn from_sorted_slice(slice: &[u32]) -> Stats { - let sum: f64 = slice.iter().map(|i| *i as f64).sum(); - let average = (sum / slice.len() as f64) as f32; - let mean = *slice.len().checked_div(2).and_then(|middle| slice.get(middle)).unwrap_or(&0); - let min = *slice.first().unwrap_or(&0); - let max = *slice.last().unwrap_or(&0); - Stats { len: slice.len(), average, mean, min, max } - } -} - -#[derive(Debug, Clone, Default)] -pub struct DelAddRoaringBitmap { - pub(crate) del: Option, - pub(crate) add: Option, -} - -impl DelAddRoaringBitmap { - fn new_del_add_u32(n: u32) -> Self { - DelAddRoaringBitmap { - del: Some(RoaringBitmap::from([n])), - add: Some(RoaringBitmap::from([n])), - } - } - - fn new_del(bitmap: RoaringBitmap) -> Self { - DelAddRoaringBitmap { del: Some(bitmap), add: None } - } - - fn new_del_u32(n: u32) -> Self { - DelAddRoaringBitmap { del: Some(RoaringBitmap::from([n])), add: None } - } - - fn new_add(bitmap: RoaringBitmap) -> Self { - DelAddRoaringBitmap { del: None, add: Some(bitmap) } - } - - fn new_add_u32(n: u32) -> Self { - DelAddRoaringBitmap { del: None, add: Some(RoaringBitmap::from([n])) } - } -} diff --git a/milli/src/update/new/extract/lru.rs b/milli/src/update/new/extract/lru.rs deleted file mode 100644 index 3eca47cb2..000000000 --- a/milli/src/update/new/extract/lru.rs +++ /dev/null @@ -1,234 +0,0 @@ -use std::borrow::Borrow; -use std::hash::{BuildHasher, Hash}; -use std::iter::repeat_with; -use std::mem; -use std::num::NonZeroUsize; - -use hashbrown::hash_map::{DefaultHashBuilder, Entry}; -use hashbrown::HashMap; - -#[derive(Debug)] -pub struct Lru { - lookup: HashMap, - storage: FixedSizeList>, -} - -impl Lru { - /// Creates a new LRU cache that holds at most `capacity` elements. - pub fn new(capacity: NonZeroUsize) -> Self { - Self { lookup: HashMap::new(), storage: FixedSizeList::new(capacity.get()) } - } -} - -impl Lru { - /// Creates a new LRU cache that holds at most `capacity` elements - /// and uses the provided hash builder to hash keys. - pub fn with_hasher(capacity: NonZeroUsize, hash_builder: S) -> Lru { - Self { - lookup: HashMap::with_hasher(hash_builder), - storage: FixedSizeList::new(capacity.get()), - } - } -} - -impl Lru { - /// Returns a mutable reference to the value of the key in the cache or `None` if it is not present in the cache. - /// - /// Moves the key to the head of the LRU list if it exists. - pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> - where - K: Borrow, - Q: Hash + Eq + ?Sized, - { - let idx = *self.lookup.get(key)?; - self.storage.move_front(idx).map(|node| &mut node.value) - } -} - -impl Lru { - pub fn push(&mut self, key: K, value: V) -> Option<(K, V)> { - match self.lookup.entry(key) { - Entry::Occupied(occ) => { - // It's fine to unwrap here because: - // * the entry already exists - let node = self.storage.move_front(*occ.get()).unwrap(); - let old_value = mem::replace(&mut node.value, value); - let old_key = occ.replace_key(); - Some((old_key, old_value)) - } - Entry::Vacant(vac) => { - let key = vac.key().clone(); - if self.storage.is_full() { - // It's fine to unwrap here because: - // * the cache capacity is non zero - // * the cache is full - let idx = self.storage.back_idx(); - let node = self.storage.move_front(idx).unwrap(); - let LruNode { key, value } = mem::replace(node, LruNode { key, value }); - vac.insert(idx); - self.lookup.remove(&key); - Some((key, value)) - } else { - // It's fine to unwrap here because: - // * the cache capacity is non zero - // * the cache is not full - let (idx, _) = self.storage.push_front(LruNode { key, value }).unwrap(); - vac.insert(idx); - None - } - } - } - } -} - -impl IntoIterator for Lru { - type Item = (K, V); - type IntoIter = IntoIter; - - fn into_iter(self) -> Self::IntoIter { - IntoIter { lookup_iter: self.lookup.into_iter(), nodes: self.storage.nodes } - } -} - -pub struct IntoIter { - lookup_iter: hashbrown::hash_map::IntoIter, - nodes: Box<[Option>>]>, -} - -impl Iterator for IntoIter { - type Item = (K, V); - - fn next(&mut self) -> Option { - let (_key, idx) = self.lookup_iter.next()?; - let LruNode { key, value } = self.nodes.get_mut(idx)?.take()?.data; - Some((key, value)) - } -} - -#[derive(Debug)] -struct LruNode { - key: K, - value: V, -} - -#[derive(Debug)] -struct FixedSizeListNode { - prev: usize, - next: usize, - data: T, -} - -#[derive(Debug)] -struct FixedSizeList { - nodes: Box<[Option>]>, - /// Also corresponds to the first `None` in the nodes. - length: usize, - // TODO Also, we probably do not need one of the front and back cursors. - front: usize, - back: usize, -} - -impl FixedSizeList { - fn new(capacity: usize) -> Self { - Self { - nodes: repeat_with(|| None).take(capacity).collect::>().into_boxed_slice(), - length: 0, - front: usize::MAX, - back: usize::MAX, - } - } - - #[inline] - fn capacity(&self) -> usize { - self.nodes.len() - } - - #[inline] - fn len(&self) -> usize { - self.length - } - - #[inline] - fn is_empty(&self) -> bool { - self.len() == 0 - } - - #[inline] - fn is_full(&self) -> bool { - self.len() == self.capacity() - } - - #[inline] - fn back_idx(&self) -> usize { - self.back - } - - #[inline] - fn next(&mut self) -> Option { - if self.is_full() { - None - } else { - let current_free = self.length; - self.length += 1; - Some(current_free) - } - } - - #[inline] - fn node_mut(&mut self, idx: usize) -> Option<&mut FixedSizeListNode> { - self.nodes.get_mut(idx).and_then(|node| node.as_mut()) - } - - #[inline] - fn node_ref(&self, idx: usize) -> Option<&FixedSizeListNode> { - self.nodes.get(idx).and_then(|node| node.as_ref()) - } - - #[inline] - fn move_front(&mut self, idx: usize) -> Option<&mut T> { - let node = self.nodes.get_mut(idx)?.take()?; - if let Some(prev) = self.node_mut(node.prev) { - prev.next = node.next; - } else { - self.front = node.next; - } - if let Some(next) = self.node_mut(node.next) { - next.prev = node.prev; - } else { - self.back = node.prev; - } - - if let Some(front) = self.node_mut(self.front) { - front.prev = idx; - } - if self.node_ref(self.back).is_none() { - self.back = idx; - } - - let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode { - prev: usize::MAX, - next: self.front, - data: node.data, - }); - self.front = idx; - Some(&mut node.data) - } - - #[inline] - fn push_front(&mut self, data: T) -> Option<(usize, &mut T)> { - let idx = self.next()?; - if let Some(front) = self.node_mut(self.front) { - front.prev = idx; - } - if self.node_ref(self.back).is_none() { - self.back = idx; - } - let node = self.nodes.get_mut(idx).unwrap().insert(FixedSizeListNode { - prev: usize::MAX, - next: self.front, - data, - }); - self.front = idx; - Some((idx, &mut node.data)) - } -} diff --git a/milli/src/update/new/extract/searchable/extract_word_docids.rs b/milli/src/update/new/extract/searchable/extract_word_docids.rs deleted file mode 100644 index b9e4803c7..000000000 --- a/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ /dev/null @@ -1,533 +0,0 @@ -use std::cell::RefCell; -use std::collections::HashMap; -use std::fs::File; -use std::num::NonZero; -use std::ops::DerefMut as _; - -use bumpalo::Bump; -use grenad::{Merger, MergerBuilder}; -use heed::RoTxn; - -use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; -use crate::update::new::extract::cache::CboCachedSorter; -use crate::update::new::extract::perm_json_p::contained_in; -use crate::update::new::indexer::document_changes::{ - extract, DocumentChangeContext, DocumentChanges, Extractor, FullySend, IndexingContext, - Progress, RefCellExt, ThreadLocal, -}; -use crate::update::new::DocumentChange; -use crate::update::{create_sorter, GrenadParameters, MergeDeladdCboRoaringBitmaps}; -use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; - -const MAX_COUNTED_WORDS: usize = 30; - -pub struct WordDocidsCachedSorters { - word_fid_docids: CboCachedSorter, - word_docids: CboCachedSorter, - exact_word_docids: CboCachedSorter, - word_position_docids: CboCachedSorter, - fid_word_count_docids: CboCachedSorter, - fid_word_count: HashMap, - current_docid: Option, -} - -impl WordDocidsCachedSorters { - pub fn new( - indexer: GrenadParameters, - max_memory: Option, - capacity: NonZero, - ) -> Self { - let max_memory = max_memory.map(|max_memory| max_memory / 4); - - let word_fid_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let word_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let exact_word_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let word_position_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - let fid_word_count_docids = CboCachedSorter::new( - capacity, - create_sorter( - grenad::SortAlgorithm::Stable, - MergeDeladdCboRoaringBitmaps, - indexer.chunk_compression_type, - indexer.chunk_compression_level, - indexer.max_nb_chunks, - max_memory, - false, - ), - ); - - Self { - word_fid_docids, - word_docids, - exact_word_docids, - word_position_docids, - fid_word_count_docids, - fid_word_count: HashMap::new(), - current_docid: None, - } - } - - fn insert_add_u32( - &mut self, - field_id: FieldId, - position: u16, - word: &str, - exact: bool, - docid: u32, - buffer: &mut Vec, - ) -> Result<()> { - let key = word.as_bytes(); - if exact { - self.exact_word_docids.insert_add_u32(key, docid)?; - } else { - self.word_docids.insert_add_u32(key, docid)?; - } - - buffer.clear(); - buffer.extend_from_slice(word.as_bytes()); - buffer.push(0); - buffer.extend_from_slice(&field_id.to_be_bytes()); - self.word_fid_docids.insert_add_u32(buffer, docid)?; - - let position = bucketed_position(position); - buffer.clear(); - buffer.extend_from_slice(word.as_bytes()); - buffer.push(0); - buffer.extend_from_slice(&position.to_be_bytes()); - self.word_position_docids.insert_add_u32(buffer, docid)?; - - if self.current_docid.map_or(false, |id| docid != id) { - self.flush_fid_word_count(buffer)?; - } - - self.fid_word_count - .entry(field_id) - .and_modify(|(_current_count, new_count)| *new_count += 1) - .or_insert((0, 1)); - self.current_docid = Some(docid); - - Ok(()) - } - - fn insert_del_u32( - &mut self, - field_id: FieldId, - position: u16, - word: &str, - exact: bool, - docid: u32, - buffer: &mut Vec, - ) -> Result<()> { - let key = word.as_bytes(); - if exact { - self.exact_word_docids.insert_del_u32(key, docid)?; - } else { - self.word_docids.insert_del_u32(key, docid)?; - } - - buffer.clear(); - buffer.extend_from_slice(word.as_bytes()); - buffer.push(0); - buffer.extend_from_slice(&field_id.to_be_bytes()); - self.word_fid_docids.insert_del_u32(buffer, docid)?; - - let position = bucketed_position(position); - buffer.clear(); - buffer.extend_from_slice(word.as_bytes()); - buffer.push(0); - buffer.extend_from_slice(&position.to_be_bytes()); - self.word_position_docids.insert_del_u32(buffer, docid)?; - - if self.current_docid.map_or(false, |id| docid != id) { - self.flush_fid_word_count(buffer)?; - } - - self.fid_word_count - .entry(field_id) - .and_modify(|(current_count, _new_count)| *current_count += 1) - .or_insert((1, 0)); - self.current_docid = Some(docid); - - Ok(()) - } - - fn flush_fid_word_count(&mut self, buffer: &mut Vec) -> Result<()> { - for (fid, (current_count, new_count)) in self.fid_word_count.drain() { - if current_count != new_count { - if current_count <= MAX_COUNTED_WORDS { - buffer.clear(); - buffer.extend_from_slice(&fid.to_be_bytes()); - buffer.push(current_count as u8); - self.fid_word_count_docids - .insert_del_u32(buffer, self.current_docid.unwrap())?; - } - if new_count <= MAX_COUNTED_WORDS { - buffer.clear(); - buffer.extend_from_slice(&fid.to_be_bytes()); - buffer.push(new_count as u8); - self.fid_word_count_docids - .insert_add_u32(buffer, self.current_docid.unwrap())?; - } - } - } - - Ok(()) - } -} - -struct WordDocidsMergerBuilders { - word_fid_docids: MergerBuilder, - word_docids: MergerBuilder, - exact_word_docids: MergerBuilder, - word_position_docids: MergerBuilder, - fid_word_count_docids: MergerBuilder, -} - -pub struct WordDocidsMergers { - pub word_fid_docids: Merger, - pub word_docids: Merger, - pub exact_word_docids: Merger, - pub word_position_docids: Merger, - pub fid_word_count_docids: Merger, -} - -impl WordDocidsMergerBuilders { - fn new() -> Self { - Self { - word_fid_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - word_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - exact_word_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - word_position_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - fid_word_count_docids: MergerBuilder::new(MergeDeladdCboRoaringBitmaps), - } - } - - fn add_sorters(&mut self, other: WordDocidsCachedSorters) -> Result<()> { - let WordDocidsCachedSorters { - word_fid_docids, - word_docids, - exact_word_docids, - word_position_docids, - fid_word_count_docids, - fid_word_count: _, - current_docid: _, - } = other; - - let mut word_fid_docids_readers = Ok(vec![]); - let mut word_docids_readers = Ok(vec![]); - let mut exact_word_docids_readers = Ok(vec![]); - let mut word_position_docids_readers = Ok(vec![]); - let mut fid_word_count_docids_readers = Ok(vec![]); - rayon::scope(|s| { - s.spawn(|_| { - word_fid_docids_readers = - word_fid_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - word_docids_readers = - word_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - exact_word_docids_readers = - exact_word_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - word_position_docids_readers = - word_position_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - s.spawn(|_| { - fid_word_count_docids_readers = - fid_word_count_docids.into_sorter().and_then(|s| s.into_reader_cursors()); - }); - }); - self.word_fid_docids.extend(word_fid_docids_readers?); - self.word_docids.extend(word_docids_readers?); - self.exact_word_docids.extend(exact_word_docids_readers?); - self.word_position_docids.extend(word_position_docids_readers?); - self.fid_word_count_docids.extend(fid_word_count_docids_readers?); - - Ok(()) - } - - fn build(self) -> WordDocidsMergers { - WordDocidsMergers { - word_fid_docids: self.word_fid_docids.build(), - word_docids: self.word_docids.build(), - exact_word_docids: self.exact_word_docids.build(), - word_position_docids: self.word_position_docids.build(), - fid_word_count_docids: self.fid_word_count_docids.build(), - } - } -} - -pub struct WordDocidsExtractorData<'extractor> { - tokenizer: &'extractor DocumentTokenizer<'extractor>, - grenad_parameters: GrenadParameters, - max_memory: Option, -} - -impl<'extractor> Extractor<'extractor> for WordDocidsExtractorData<'extractor> { - type Data = FullySend>; - - fn init_data( - &self, - _extractor_alloc: raw_collections::alloc::RefBump<'extractor>, - ) -> Result { - Ok(FullySend(RefCell::new(WordDocidsCachedSorters::new( - self.grenad_parameters, - self.max_memory, - // TODO use a better value - 200_000.try_into().unwrap(), - )))) - } - - fn process<'doc>( - &self, - changes: impl Iterator>>, - context: &DocumentChangeContext, - ) -> Result<()> { - for change in changes { - let change = change?; - WordDocidsExtractors::extract_document_change(context, self.tokenizer, change)?; - } - Ok(()) - } -} - -pub struct WordDocidsExtractors; - -impl WordDocidsExtractors { - pub fn run_extraction<'pl, 'fid, 'indexer, 'index, DC: DocumentChanges<'pl>, MSP, SP>( - grenad_parameters: GrenadParameters, - document_changes: &DC, - indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, - extractor_allocs: &mut ThreadLocal>>, - finished_steps: u16, - total_steps: u16, - step_name: &'static str, - ) -> Result - where - MSP: Fn() -> bool + Sync, - SP: Fn(Progress) + Sync, - { - let max_memory = grenad_parameters.max_memory_by_thread(); - let index = indexing_context.index; - - let rtxn = index.read_txn()?; - let stop_words = index.stop_words(&rtxn)?; - let allowed_separators = index.allowed_separators(&rtxn)?; - let allowed_separators: Option> = - allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect()); - let dictionary = index.dictionary(&rtxn)?; - let dictionary: Option> = - dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); - let builder = tokenizer_builder( - stop_words.as_ref(), - allowed_separators.as_deref(), - dictionary.as_deref(), - ); - let tokenizer = builder.into_tokenizer(); - - let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; - let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?; - let localized_attributes_rules = - index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); - - let document_tokenizer = DocumentTokenizer { - tokenizer: &tokenizer, - attribute_to_extract: attributes_to_extract.as_deref(), - attribute_to_skip: attributes_to_skip.as_slice(), - localized_attributes_rules: &localized_attributes_rules, - max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, - }; - - let datastore = ThreadLocal::new(); - - { - let span = - tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); - let _entered = span.enter(); - - let extractor = WordDocidsExtractorData { - tokenizer: &document_tokenizer, - grenad_parameters, - max_memory, - }; - - extract( - document_changes, - &extractor, - indexing_context, - extractor_allocs, - &datastore, - finished_steps, - total_steps, - step_name, - )?; - } - - { - let span = - tracing::trace_span!(target: "indexing::documents::extract", "merger_building"); - let _entered = span.enter(); - let mut builder = WordDocidsMergerBuilders::new(); - for cache in datastore.into_iter().map(|cache| cache.0.into_inner()) { - builder.add_sorters(cache)?; - } - - Ok(builder.build()) - } - } - - fn extract_document_change( - context: &DocumentChangeContext>>, - document_tokenizer: &DocumentTokenizer, - document_change: DocumentChange, - ) -> Result<()> { - let index = &context.index; - let rtxn = &context.txn; - let mut cached_sorter = context.data.0.borrow_mut_or_yield(); - let cached_sorter = cached_sorter.deref_mut(); - let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); - let new_fields_ids_map = new_fields_ids_map.deref_mut(); - - let exact_attributes = index.exact_attributes(rtxn)?; - let is_exact_attribute = - |fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr)); - let mut buffer = Vec::new(); - match document_change { - DocumentChange::Deletion(inner) => { - let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_del_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - &mut buffer, - ) - .map_err(crate::Error::from) - }; - document_tokenizer.tokenize_document( - inner.current(rtxn, index, context.db_fields_ids_map)?, - new_fields_ids_map, - &mut token_fn, - )?; - } - DocumentChange::Update(inner) => { - let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_del_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - &mut buffer, - ) - .map_err(crate::Error::from) - }; - document_tokenizer.tokenize_document( - inner.current(rtxn, index, context.db_fields_ids_map)?, - new_fields_ids_map, - &mut token_fn, - )?; - - let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_add_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - &mut buffer, - ) - .map_err(crate::Error::from) - }; - document_tokenizer.tokenize_document( - inner.merged(rtxn, index, context.db_fields_ids_map)?, - new_fields_ids_map, - &mut token_fn, - )?; - } - DocumentChange::Insertion(inner) => { - let mut token_fn = |fname: &str, fid, pos, word: &str| { - cached_sorter - .insert_add_u32( - fid, - pos, - word, - is_exact_attribute(fname), - inner.docid(), - &mut buffer, - ) - .map_err(crate::Error::from) - }; - document_tokenizer.tokenize_document( - inner.inserted(), - new_fields_ids_map, - &mut token_fn, - )?; - } - } - - cached_sorter.flush_fid_word_count(&mut buffer) - } - - fn attributes_to_extract<'a>( - rtxn: &'a RoTxn, - index: &'a Index, - ) -> Result>> { - index.user_defined_searchable_fields(rtxn).map_err(Into::into) - } - - fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result> { - Ok(vec![]) - } -} diff --git a/milli/src/update/new/merger.rs b/milli/src/update/new/merger.rs deleted file mode 100644 index 14e947686..000000000 --- a/milli/src/update/new/merger.rs +++ /dev/null @@ -1,448 +0,0 @@ -use std::fs::File; -use std::io::{self}; - -use bincode::ErrorKind; -use grenad::Merger; -use hashbrown::HashSet; -use heed::types::Bytes; -use heed::{Database, RoTxn}; -use roaring::RoaringBitmap; - -use super::channel::*; -use super::extract::FacetKind; -use super::word_fst_builder::{PrefixData, PrefixDelta}; -use super::{Deletion, DocumentChange, KvReaderDelAdd, KvReaderFieldId}; -use crate::update::del_add::DelAdd; -use crate::update::new::channel::MergerOperation; -use crate::update::new::word_fst_builder::WordFstBuilder; -use crate::update::MergeDeladdCboRoaringBitmaps; -use crate::{CboRoaringBitmapCodec, Error, FieldId, GeoPoint, GlobalFieldsIdsMap, Index, Result}; - -/// TODO We must return some infos/stats -#[tracing::instrument(level = "trace", skip_all, target = "indexing::documents", name = "merge")] -pub fn merge_grenad_entries( - receiver: MergerReceiver, - sender: MergerSender, - rtxn: &RoTxn, - index: &Index, - global_fields_ids_map: GlobalFieldsIdsMap<'_>, -) -> Result { - let mut buffer: Vec = Vec::new(); - let mut documents_ids = index.documents_ids(rtxn)?; - let mut geo_extractor = GeoExtractor::new(rtxn, index)?; - let mut merger_result = MergerResult::default(); - - for merger_operation in receiver { - match merger_operation { - MergerOperation::ExactWordDocidsMerger(merger) => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - /// TODO do a MergerOperation::database(&Index) -> Database. - index.exact_word_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::FidWordCountDocidsMerger(merger) => { - let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.field_id_word_count_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::WordDocidsMerger(merger) => { - let words_fst = index.words_fst(rtxn)?; - let mut word_fst_builder = WordFstBuilder::new(&words_fst)?; - let prefix_settings = index.prefix_settings(rtxn)?; - word_fst_builder.with_prefix_settings(prefix_settings); - - { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "word_docids"); - let _entered = span.enter(); - - merge_and_send_docids( - merger, - index.word_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |deladd, key| word_fst_builder.register_word(deladd, key), - )?; - } - - { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "words_fst"); - let _entered = span.enter(); - - let (word_fst_mmap, prefix_data) = word_fst_builder.build(index, rtxn)?; - sender.main().write_words_fst(word_fst_mmap).unwrap(); - if let Some(PrefixData { prefixes_fst_mmap, prefix_delta }) = prefix_data { - sender.main().write_words_prefixes_fst(prefixes_fst_mmap).unwrap(); - merger_result.prefix_delta = Some(prefix_delta); - } - } - } - MergerOperation::WordFidDocidsMerger(merger) => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.word_fid_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::WordPairProximityDocidsMerger(merger) => { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.word_pair_proximity_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::WordPositionDocidsMerger(merger) => { - let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids"); - let _entered = span.enter(); - merge_and_send_docids( - merger, - index.word_position_docids.remap_types(), - rtxn, - &mut buffer, - sender.docids::(), - |_, _key| Ok(()), - )?; - } - MergerOperation::InsertDocument { docid, external_id, document } => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "insert_document"); - let _entered = span.enter(); - documents_ids.insert(docid); - sender.documents().uncompressed(docid, external_id.clone(), &document).unwrap(); - - if let Some(geo_extractor) = geo_extractor.as_mut() { - let current = index.documents.remap_data_type::().get(rtxn, &docid)?; - let current: Option<&KvReaderFieldId> = current.map(Into::into); - let change = match current { - Some(current) => DocumentChange::Update(todo!()), - None => DocumentChange::Insertion(todo!()), - }; - geo_extractor.manage_change(&mut global_fields_ids_map, &change)?; - } - } - MergerOperation::DeleteDocument { docid, external_id } => { - /// TODO: delete vectors - let span = - tracing::trace_span!(target: "indexing::documents::merge", "delete_document"); - let _entered = span.enter(); - if !documents_ids.remove(docid) { - unreachable!("Tried deleting a document that we do not know about"); - } - sender.documents().delete(docid, external_id.clone()).unwrap(); - - if let Some(geo_extractor) = geo_extractor.as_mut() { - let change = DocumentChange::Deletion(Deletion::create(docid, todo!())); - geo_extractor.manage_change(&mut global_fields_ids_map, &change)?; - } - } - MergerOperation::FinishedDocument => { - // send the rtree - } - MergerOperation::FacetDocidsMerger(merger) => { - let span = - tracing::trace_span!(target: "indexing::documents::merge", "facet_docids"); - let _entered = span.enter(); - let mut facet_field_ids_delta = FacetFieldIdsDelta::new(); - merge_and_send_facet_docids( - merger, - FacetDatabases::new(index), - rtxn, - &mut buffer, - sender.facet_docids(), - &mut facet_field_ids_delta, - )?; - - merger_result.facet_field_ids_delta = Some(facet_field_ids_delta); - } - } - } - - { - let span = tracing::trace_span!(target: "indexing::documents::merge", "documents_ids"); - let _entered = span.enter(); - - // Send the documents ids unionized with the current one - sender.send_documents_ids(documents_ids).unwrap(); - } - - // ... - - Ok(merger_result) -} - -#[derive(Default, Debug)] -pub struct MergerResult { - /// The delta of the prefixes - pub prefix_delta: Option, - /// The field ids that have been modified - pub facet_field_ids_delta: Option, -} - -pub struct GeoExtractor { - rtree: Option>, -} - -impl GeoExtractor { - pub fn new(rtxn: &RoTxn, index: &Index) -> Result> { - let is_sortable = index.sortable_fields(rtxn)?.contains("_geo"); - let is_filterable = index.filterable_fields(rtxn)?.contains("_geo"); - if is_sortable || is_filterable { - Ok(Some(GeoExtractor { rtree: index.geo_rtree(rtxn)? })) - } else { - Ok(None) - } - } - - pub fn manage_change( - &mut self, - fidmap: &mut GlobalFieldsIdsMap, - change: &DocumentChange, - ) -> Result<()> { - match change { - DocumentChange::Deletion(_) => todo!(), - DocumentChange::Update(_) => todo!(), - DocumentChange::Insertion(_) => todo!(), - } - } - - pub fn serialize_rtree(self, writer: &mut W) -> Result { - match self.rtree { - Some(rtree) => { - // TODO What should I do? - bincode::serialize_into(writer, &rtree).map(|_| true).map_err(|e| match *e { - ErrorKind::Io(e) => Error::IoError(e), - ErrorKind::InvalidUtf8Encoding(_) => todo!(), - ErrorKind::InvalidBoolEncoding(_) => todo!(), - ErrorKind::InvalidCharEncoding => todo!(), - ErrorKind::InvalidTagEncoding(_) => todo!(), - ErrorKind::DeserializeAnyNotSupported => todo!(), - ErrorKind::SizeLimit => todo!(), - ErrorKind::SequenceMustHaveLength => todo!(), - ErrorKind::Custom(_) => todo!(), - }) - } - None => Ok(false), - } - } -} - -#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] -fn merge_and_send_docids( - merger: Merger, - database: Database, - rtxn: &RoTxn<'_>, - buffer: &mut Vec, - docids_sender: impl DocidsSender, - mut register_key: impl FnMut(DelAdd, &[u8]) -> Result<()>, -) -> Result<()> { - let mut merger_iter = merger.into_stream_merger_iter().unwrap(); - while let Some((key, deladd)) = merger_iter.next().unwrap() { - let current = database.get(rtxn, key)?; - let deladd: &KvReaderDelAdd = deladd.into(); - let del = deladd.get(DelAdd::Deletion); - let add = deladd.get(DelAdd::Addition); - - match merge_cbo_bitmaps(current, del, add)? { - Operation::Write(bitmap) => { - let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer); - docids_sender.write(key, value).unwrap(); - register_key(DelAdd::Addition, key)?; - } - Operation::Delete => { - docids_sender.delete(key).unwrap(); - register_key(DelAdd::Deletion, key)?; - } - Operation::Ignore => (), - } - } - - Ok(()) -} - -#[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")] -fn merge_and_send_facet_docids( - merger: Merger, - database: FacetDatabases, - rtxn: &RoTxn<'_>, - buffer: &mut Vec, - docids_sender: impl DocidsSender, - facet_field_ids_delta: &mut FacetFieldIdsDelta, -) -> Result<()> { - let mut merger_iter = merger.into_stream_merger_iter().unwrap(); - while let Some((key, deladd)) = merger_iter.next().unwrap() { - let current = database.get_cbo_roaring_bytes_value(rtxn, key)?; - let deladd: &KvReaderDelAdd = deladd.into(); - let del = deladd.get(DelAdd::Deletion); - let add = deladd.get(DelAdd::Addition); - - match merge_cbo_bitmaps(current, del, add)? { - Operation::Write(bitmap) => { - facet_field_ids_delta.register_from_key(key); - let value = cbo_bitmap_serialize_into_vec(&bitmap, buffer); - docids_sender.write(key, value).unwrap(); - } - Operation::Delete => { - facet_field_ids_delta.register_from_key(key); - docids_sender.delete(key).unwrap(); - } - Operation::Ignore => (), - } - } - - Ok(()) -} - -struct FacetDatabases<'a> { - index: &'a Index, -} - -impl<'a> FacetDatabases<'a> { - fn new(index: &'a Index) -> Self { - Self { index } - } - - fn get_cbo_roaring_bytes_value<'t>( - &self, - rtxn: &'t RoTxn<'_>, - key: &[u8], - ) -> heed::Result> { - let (facet_kind, key) = FacetKind::extract_from_key(key); - - let value = - super::channel::Database::from(facet_kind).database(self.index).get(rtxn, key)?; - match facet_kind { - // skip level group size - FacetKind::String | FacetKind::Number => Ok(value.map(|v| &v[1..])), - _ => Ok(value), - } - } -} - -#[derive(Debug)] -pub struct FacetFieldIdsDelta { - /// The field ids that have been modified - modified_facet_string_ids: HashSet, - modified_facet_number_ids: HashSet, -} - -impl FacetFieldIdsDelta { - fn new() -> Self { - Self { - modified_facet_string_ids: HashSet::new(), - modified_facet_number_ids: HashSet::new(), - } - } - - fn register_facet_string_id(&mut self, field_id: FieldId) { - self.modified_facet_string_ids.insert(field_id); - } - - fn register_facet_number_id(&mut self, field_id: FieldId) { - self.modified_facet_number_ids.insert(field_id); - } - - fn register_from_key(&mut self, key: &[u8]) { - let (facet_kind, field_id) = self.extract_key_data(key); - match facet_kind { - FacetKind::Number => self.register_facet_number_id(field_id), - FacetKind::String => self.register_facet_string_id(field_id), - _ => (), - } - } - - fn extract_key_data(&self, key: &[u8]) -> (FacetKind, FieldId) { - let facet_kind = FacetKind::from(key[0]); - let field_id = FieldId::from_be_bytes([key[1], key[2]]); - (facet_kind, field_id) - } - - pub fn modified_facet_string_ids(&self) -> Option> { - if self.modified_facet_string_ids.is_empty() { - None - } else { - Some(self.modified_facet_string_ids.iter().copied().collect()) - } - } - - pub fn modified_facet_number_ids(&self) -> Option> { - if self.modified_facet_number_ids.is_empty() { - None - } else { - Some(self.modified_facet_number_ids.iter().copied().collect()) - } - } -} - -enum Operation { - Write(RoaringBitmap), - Delete, - Ignore, -} - -/// A function that merges the DelAdd CboRoaringBitmaps with the current bitmap. -fn merge_cbo_bitmaps( - current: Option<&[u8]>, - del: Option<&[u8]>, - add: Option<&[u8]>, -) -> Result { - let current = current.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; - let del = del.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; - let add = add.map(CboRoaringBitmapCodec::deserialize_from).transpose()?; - - match (current, del, add) { - (None, None, None) => Ok(Operation::Ignore), // but it's strange - (None, None, Some(add)) => Ok(Operation::Write(add)), - (None, Some(_del), None) => Ok(Operation::Ignore), // but it's strange - (None, Some(_del), Some(add)) => Ok(Operation::Write(add)), - (Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange - (Some(current), None, Some(add)) => Ok(Operation::Write(current | add)), - (Some(current), Some(del), add) => { - let output = match add { - Some(add) => (¤t - del) | add, - None => ¤t - del, - }; - if output.is_empty() { - Ok(Operation::Delete) - } else if current == output { - Ok(Operation::Ignore) - } else { - Ok(Operation::Write(output)) - } - } - } -} - -/// TODO Return the slice directly from the serialize_into method -fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec) -> &'b [u8] { - buffer.clear(); - CboRoaringBitmapCodec::serialize_into(bitmap, buffer); - buffer.as_slice() -} diff --git a/milli/src/update/new/parallel_iterator_ext.rs b/milli/src/update/new/parallel_iterator_ext.rs deleted file mode 100644 index 043457cfd..000000000 --- a/milli/src/update/new/parallel_iterator_ext.rs +++ /dev/null @@ -1,74 +0,0 @@ -use std::sync::Arc; - -use rayon::iter::{MapInit, ParallelIterator}; - -pub trait ParallelIteratorExt: ParallelIterator { - /// Maps items based on the init function. - /// - /// The init function is ran only as necessary which is basically once by thread. - fn try_map_try_init( - self, - init: INIT, - map_op: F, - ) -> MapInit< - Self, - impl Fn() -> Result> + Sync + Send + Clone, - impl Fn(&mut Result>, Self::Item) -> Result> + Sync + Send + Clone, - > - where - E: Send + Sync, - F: Fn(&mut T, Self::Item) -> Result + Sync + Send + Clone, - INIT: Fn() -> Result + Sync + Send + Clone, - R: Send, - { - self.map_init( - move || match init() { - Ok(t) => Ok(t), - Err(err) => Err(Arc::new(err)), - }, - move |result, item| match result { - Ok(t) => map_op(t, item).map_err(Arc::new), - Err(err) => Err(err.clone()), - }, - ) - } - - /// A method to run a closure of all the items and return an owned error. - /// - /// The init function is ran only as necessary which is basically once by thread. - fn try_arc_for_each_try_init(self, init: INIT, op: F) -> Result<(), E> - where - E: Send + Sync, - F: Fn(&mut T, Self::Item) -> Result<(), Arc> + Sync + Send + Clone, - INIT: Fn() -> Result + Sync + Send + Clone, - { - let result = self.try_for_each_init( - move || match init() { - Ok(t) => Ok(t), - Err(err) => Err(Arc::new(err)), - }, - move |result, item| match result { - Ok(t) => op(t, item), - Err(err) => Err(err.clone()), - }, - ); - - match result { - Ok(()) => Ok(()), - Err(err) => Err(Arc::into_inner(err).expect("the error must be only owned by us")), - } - } - - fn try_arc_for_each(self, op: F) -> Result<(), E> - where - E: Send + Sync, - F: Fn(Self::Item) -> Result<(), Arc> + Sync + Send + Clone, - { - match self.try_for_each(op) { - Ok(()) => Ok(()), - Err(err) => Err(Arc::into_inner(err).expect("the error must be only owned by us")), - } - } -} - -impl ParallelIteratorExt for T {} diff --git a/workloads/search/embeddings-movies-subset-hf.json b/workloads/search/embeddings-movies-subset-hf.json index aeeecac59..36f45cfb9 100644 --- a/workloads/search/embeddings-movies-subset-hf.json +++ b/workloads/search/embeddings-movies-subset-hf.json @@ -77,7 +77,8 @@ "q": "puppy cute comforting movie", "limit": 100, "hybrid": { - "semanticRatio": 0.1 + "semanticRatio": 0.1, + "embedder": "default" } } }, @@ -91,7 +92,8 @@ "q": "puppy cute comforting movie", "limit": 100, "hybrid": { - "semanticRatio": 0.5 + "semanticRatio": 0.5, + "embedder": "default" } } }, @@ -105,7 +107,8 @@ "q": "puppy cute comforting movie", "limit": 100, "hybrid": { - "semanticRatio": 0.9 + "semanticRatio": 0.9, + "embedder": "default" } } }, @@ -119,7 +122,8 @@ "q": "puppy cute comforting movie", "limit": 100, "hybrid": { - "semanticRatio": 1.0 + "semanticRatio": 1.0, + "embedder": "default" } } }, @@ -133,7 +137,8 @@ "q": "shrek", "limit": 100, "hybrid": { - "semanticRatio": 1.0 + "semanticRatio": 1.0, + "embedder": "default" } } }, @@ -147,7 +152,8 @@ "q": "shrek", "limit": 100, "hybrid": { - "semanticRatio": 0.5 + "semanticRatio": 0.5, + "embedder": "default" } } }, @@ -161,7 +167,8 @@ "q": "shrek", "limit": 100, "hybrid": { - "semanticRatio": 0.1 + "semanticRatio": 0.1, + "embedder": "default" } } },