diff --git a/.dockerignore b/.dockerignore index ef2e39bfc..8c6bdbdeb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -2,4 +2,3 @@ target Dockerfile .dockerignore .gitignore -**/.git diff --git a/.github/scripts/check-release.sh b/.github/scripts/check-release.sh index b0b272226..d9cb128b5 100644 --- a/.github/scripts/check-release.sh +++ b/.github/scripts/check-release.sh @@ -1,24 +1,41 @@ -#!/bin/bash +#!/usr/bin/env bash +set -eu -o pipefail -# check_tag $current_tag $file_tag $file_name -function check_tag { - if [[ "$1" != "$2" ]]; then - echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1" - ret=1 - fi +check_tag() { + local expected=$1 + local actual=$2 + local filename=$3 + + if [[ $actual != $expected ]]; then + echo >&2 "Error: the current tag does not match the version in $filename: found $actual, expected $expected" + return 1 + fi } +read_version() { + grep '^version = ' | cut -d \" -f 2 +} + +if [[ -z "${GITHUB_REF:-}" ]]; then + echo >&2 "Error: GITHUB_REF is not set" + exit 1 +fi + +if [[ ! "$GITHUB_REF" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+(-[a-z0-9]+)?$ ]]; then + echo >&2 "Error: GITHUB_REF is not a valid tag: $GITHUB_REF" + exit 1 +fi + +current_tag=${GITHUB_REF#refs/tags/v} ret=0 -current_tag=${GITHUB_REF#'refs/tags/v'} -file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')" -check_tag $current_tag $file_tag +toml_tag="$(cat Cargo.toml | read_version)" +check_tag "$current_tag" "$toml_tag" Cargo.toml || ret=1 -lock_file='Cargo.lock' -lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ') -check_tag $current_tag $lock_tag $lock_file +lock_tag=$(grep -A 1 '^name = "meilisearch-auth"' Cargo.lock | read_version) +check_tag "$current_tag" "$lock_tag" Cargo.lock || ret=1 -if [[ "$ret" -eq 0 ]] ; then - echo 'OK' +if (( ret == 0 )); then + echo 'OK' fi exit $ret diff --git a/.github/workflows/fuzzer-indexing.yml b/.github/workflows/fuzzer-indexing.yml new file mode 100644 index 000000000..1d01a6ea5 --- /dev/null +++ b/.github/workflows/fuzzer-indexing.yml @@ -0,0 +1,24 @@ +name: Run the indexing fuzzer + +on: + push: + branches: + - main + +jobs: + fuzz: + name: Setup the action + runs-on: ubuntu-latest + timeout-minutes: 4320 # 72h + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + # Run benchmarks + - name: Run the fuzzer + run: | + cargo run --release --bin fuzz-indexing diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index e24d8ccf1..f7ab8666b 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -35,7 +35,7 @@ jobs: - name: Build deb package run: cargo deb -p meilisearch -o target/debian/meilisearch.deb - name: Upload debian pkg to release - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 76558f3b1..c79176439 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -54,7 +54,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/meilisearch @@ -87,7 +87,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} @@ -121,7 +121,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch @@ -183,7 +183,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.5.0 + uses: svenstaro/upload-release-action@2.6.1 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index fa1f145da..9ceeaaaa4 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -58,13 +58,9 @@ jobs: - name: Set up QEMU uses: docker/setup-qemu-action@v2 - with: - platforms: linux/amd64,linux/arm64 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v2 - with: - platforms: linux/amd64,linux/arm64 - name: Login to Docker Hub uses: docker/login-action@v2 @@ -92,13 +88,10 @@ jobs: push: true platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} - builder: ${{ steps.buildx.outputs.name }} build-args: | COMMIT_SHA=${{ github.sha }} COMMIT_DATE=${{ steps.build-metadata.outputs.date }} GIT_TAG=${{ github.ref_name }} - cache-from: type=gha - cache-to: type=gha,mode=max # /!\ Don't touch this without checking with Cloud team - name: Send CI information to Cloud team diff --git a/.github/workflows/sdks-tests.yml b/.github/workflows/sdks-tests.yml index 617b446d1..78c45f1b2 100644 --- a/.github/workflows/sdks-tests.yml +++ b/.github/workflows/sdks-tests.yml @@ -3,6 +3,11 @@ name: SDKs tests on: workflow_dispatch: + inputs: + docker_image: + description: 'The Meilisearch Docker image used' + required: false + default: nightly schedule: - cron: "0 6 * * MON" # Every Monday at 6:00AM @@ -11,13 +16,28 @@ env: MEILI_NO_ANALYTICS: 'true' jobs: + define-docker-image: + runs-on: ubuntu-latest + outputs: + docker-image: ${{ steps.define-image.outputs.docker-image }} + steps: + - uses: actions/checkout@v3 + - name: Define the Docker image we need to use + id: define-image + run: | + event=${{ github.event_name }} + echo "docker-image=nightly" >> $GITHUB_OUTPUT + if [[ $event == 'workflow_dispatch' ]]; then + echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT + fi meilisearch-js-tests: + needs: define-docker-image name: JS SDK tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -47,11 +67,12 @@ jobs: run: yarn test:env:browser instant-meilisearch-tests: + needs: define-docker-image name: instant-meilisearch tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -73,11 +94,12 @@ jobs: run: yarn build meilisearch-php-tests: + needs: define-docker-image name: PHP SDK tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -103,11 +125,12 @@ jobs: composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle meilisearch-python-tests: + needs: define-docker-image name: Python SDK tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -127,11 +150,12 @@ jobs: run: pipenv run pytest meilisearch-go-tests: + needs: define-docker-image name: Go SDK tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -139,7 +163,7 @@ jobs: - '7700:7700' steps: - name: Set up Go - uses: actions/setup-go@v3 + uses: actions/setup-go@v4 with: go-version: stable - uses: actions/checkout@v3 @@ -156,11 +180,12 @@ jobs: run: go test -v ./... meilisearch-ruby-tests: + needs: define-docker-image name: Ruby SDK tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} @@ -180,11 +205,12 @@ jobs: run: bundle exec rspec meilisearch-rust-tests: + needs: define-docker-image name: Rust SDK tests runs-on: ubuntu-latest services: meilisearch: - image: getmeili/meilisearch:nightly + image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }} env: MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }} MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }} diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 5a64feda8..e363d36c8 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -43,7 +43,7 @@ jobs: toolchain: nightly override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -65,7 +65,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -105,6 +105,29 @@ jobs: command: test args: --workspace --locked --release --all-features + test-disabled-tokenization: + name: Test disabled tokenization + runs-on: ubuntu-latest + container: + image: ubuntu:18.04 + if: github.event_name == 'schedule' + steps: + - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update + apt-get install --assume-yes build-essential curl + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Run cargo tree without default features and check lindera is not present + run: | + cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz + - name: Run cargo tree with default features and check lindera is pressent + run: | + cargo tree -f '{p} {f}' -e normal | grep lindera -qz + # We run tests in debug also, to make sure that the debug_assertions are hit test-debug: name: Run tests in debug @@ -123,7 +146,7 @@ jobs: toolchain: stable override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -142,7 +165,7 @@ jobs: override: true components: clippy - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -161,7 +184,7 @@ jobs: override: true components: rustfmt - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.1 + uses: Swatinem/rust-cache@v2.4.0 - name: Run cargo fmt # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate diff --git a/Cargo.lock b/Cargo.lock index 5f192b6d1..46218fc34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,19 +4,19 @@ version = 3 [[package]] name = "actix-codec" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a7559404a7f3573127aab53c08ce37a6c6a315c374a31070f3c91cd1b4a7fe" +checksum = "617a8268e3537fe1d8c9ead925fca49ef6400927ee7bc26750e90ecee14ce4b8" dependencies = [ "bitflags", "bytes", "futures-core", "futures-sink", - "log", "memchr", "pin-project-lite", "tokio", "tokio-util", + "tracing", ] [[package]] @@ -46,7 +46,7 @@ dependencies = [ "actix-tls", "actix-utils", "ahash 0.8.3", - "base64 0.21.0", + "base64 0.21.2", "bitflags", "brotli", "bytes", @@ -110,9 +110,9 @@ dependencies = [ [[package]] name = "actix-server" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0da34f8e659ea1b077bb4637948b815cd3768ad5a188fdcd74ff4d84240cd824" +checksum = "3e8613a75dd50cc45f473cee3c34d59ed677c0f7b44480ce3b8247d7dc519327" dependencies = [ "actix-rt", "actix-service", @@ -150,7 +150,7 @@ dependencies = [ "futures-core", "log", "pin-project-lite", - "tokio-rustls", + "tokio-rustls 0.23.4", "tokio-util", "webpki-roots", ] @@ -247,14 +247,13 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "aes" -version = "0.7.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" +checksum = "433cfd6710c9986c576a25ca913c39d66a6474107b406f34f91d4a8923395241" dependencies = [ "cfg-if", "cipher", "cpufeatures", - "opaque-debug", ] [[package]] @@ -282,9 +281,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -312,53 +311,71 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstream" -version = "0.2.6" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "342258dd14006105c2b75ab1bd7543a03bdf0cfc94383303ac212a04939dff6f" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" dependencies = [ "anstyle", "anstyle-parse", + "anstyle-query", "anstyle-wincon", - "concolor-override", - "concolor-query", + "colorchoice", "is-terminal", "utf8parse", ] [[package]] name = "anstyle" -version = "0.3.5" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" [[package]] name = "anstyle-parse" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7d1bb534e9efed14f3e5f44e7dd1a4f709384023a4165199a4241e18dff0116" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" dependencies = [ "utf8parse", ] [[package]] -name = "anstyle-wincon" -version = "0.2.0" +name = "anstyle-query" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3127af6145b149f3287bb9a0d10ad9c5692dba8c53ad48285e5bec4063834fa" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" dependencies = [ "anstyle", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] name = "anyhow" -version = "1.0.70" +version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" +checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" dependencies = [ "backtrace", ] +[[package]] +name = "arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -388,7 +405,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -399,7 +416,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -438,8 +455,8 @@ dependencies = [ "cc", "cfg-if", "libc", - "miniz_oxide", - "object 0.30.2", + "miniz_oxide 0.6.2", + "object", "rustc-demangle", ] @@ -451,15 +468,15 @@ checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" [[package]] name = "base64" -version = "0.21.0" +version = "0.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" [[package]] name = "base64ct" -version = "1.5.3" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" [[package]] name = "benchmarks" @@ -504,12 +521,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bitflags" version = "1.3.2" @@ -548,9 +559,9 @@ dependencies = [ [[package]] name = "bstr" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5" dependencies = [ "memchr", "once_cell", @@ -560,9 +571,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.11.1" +version = "3.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" +checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" [[package]] name = "byte-unit" @@ -597,7 +608,7 @@ checksum = "fdde5c9cd29ebd706ce1b35600920a33550e402fc998a2e53ad3b42c3c47a192" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -614,9 +625,9 @@ checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" [[package]] name = "bytestring" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7f83e57d9154148e355404702e2694463241880b939570d7c97c014da7a69a1" +checksum = "238e4886760d98c4f899360c834fa93e62cf7f721ac3c2da375cbdf4b8679aae" dependencies = [ "bytes", ] @@ -718,9 +729,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c137568cc60b904a7724001b35ce2630fd00d5d84805fbb608ab89509d788f" +checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" dependencies = [ "ciborium-io", "ciborium-ll", @@ -729,15 +740,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "346de753af073cc87b52b2083a506b38ac176a44cfb05497b622e27be899b369" +checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" [[package]] name = "ciborium-ll" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "213030a2b5a4e0c0892b6652260cf6ccac84827b83a85a534e178e3906c4cf1b" +checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" dependencies = [ "ciborium-io", "half", @@ -745,18 +756,19 @@ dependencies = [ [[package]] name = "cipher" -version = "0.3.0" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" dependencies = [ - "generic-array", + "crypto-common", + "inout", ] [[package]] name = "clap" -version = "3.2.23" +version = "3.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" dependencies = [ "bitflags", "clap_lex 0.2.4", @@ -766,9 +778,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" +checksum = "93aae7a4192245f70fe75dd9157fc7b4a5bf53e88d30bd4396f7d8f9284d5acc" dependencies = [ "clap_builder", "clap_derive", @@ -777,27 +789,27 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.2.1" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" +checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990" dependencies = [ "anstream", "anstyle", "bitflags", - "clap_lex 0.4.1", + "clap_lex 0.5.0", "strsim", ] [[package]] name = "clap_derive" -version = "4.2.0" +version = "4.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +checksum = "191d9573962933b4027f932c600cd252ce27a8ad5979418fe78e43c07996f27b" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -811,9 +823,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.4.1" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "concat-arrays" @@ -826,31 +844,16 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "concolor-override" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a855d4a1978dc52fb0536a04d384c2c0c1aa273597f08b77c8c4d3b2eec6037f" - -[[package]] -name = "concolor-query" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d11d52c3d7ca2e6d0040212be9e4dbbcd78b6447f535b6b561f449427944cf" -dependencies = [ - "windows-sys 0.45.0", -] - [[package]] name = "console" -version = "0.15.5" +version = "0.15.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d79fbe8970a77e3e34151cc13d3b3e248aa0faaecb9f6091fa07ebefe5ad60" +checksum = "c926e00cc70edefdc64d3a5ff31cc65bb97a3460097762bd23afb4d8145fccf8" dependencies = [ "encode_unicode", "lazy_static", "libc", - "windows-sys 0.42.0", + "windows-sys 0.45.0", ] [[package]] @@ -905,9 +908,9 @@ checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173" [[package]] name = "cpufeatures" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280a9f2d8b3a38871a3c8a46fb80db65e5e5ed97da80c4d08bf27fb63e35e181" +checksum = "3e4c1eaa2012c47becbbad2ab175484c2a84d1185b566fb2cc5b8707343dfe58" dependencies = [ "libc", ] @@ -931,7 +934,7 @@ dependencies = [ "atty", "cast", "ciborium", - "clap 3.2.23", + "clap 3.2.25", "criterion-plot", "itertools", "lazy_static", @@ -1096,6 +1099,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "derive_arbitrary" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "derive_builder" version = "0.12.0" @@ -1177,9 +1191,9 @@ checksum = "8c1bba4f227a4a53d12b653f50ca7bf10c9119ae2aba56aff9e0338b5c98f36a" [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -1229,7 +1243,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -1331,22 +1345,22 @@ dependencies = [ [[package]] name = "enum-iterator" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "706d9e7cf1c7664859d79cd524e4e53ea2b67ea03c98cc2870c5e539695d597e" +checksum = "7add3873b5dd076766ee79c8e406ad1a472c385476b9e38849f8eec24f1be689" dependencies = [ "enum-iterator-derive", ] [[package]] name = "enum-iterator-derive" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355f93763ef7b0ae1c43c4d8eccc9d5848d84ad1a1d8ce61c421d1ac85a19d05" +checksum = "eecf8589574ce9b895052fa12d69af7a233f99e6107f5cb8dd1044f2a17bfdcb" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", ] [[package]] @@ -1362,17 +1376,6 @@ dependencies = [ "termcolor", ] -[[package]] -name = "errno" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" -dependencies = [ - "errno-dragonfly", - "libc", - "winapi", -] - [[package]] name = "errno" version = "0.3.1" @@ -1433,19 +1436,19 @@ dependencies = [ "faux", "tempfile", "thiserror", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] name = "filetime" -version = "0.2.19" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e884668cd0c7480504233e951174ddc3b382f7c2666e3b7310b5c4e7b0c37f9" +checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153" dependencies = [ "cfg-if", "libc", "redox_syscall 0.2.16", - "windows-sys 0.42.0", + "windows-sys 0.48.0", ] [[package]] @@ -1465,13 +1468,12 @@ checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" [[package]] name = "flate2" -version = "1.0.25" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a2db397cb1c8772f31494cb8917e48cd1e64f0fa7efac59fbd741a0a8ce841" +checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743" dependencies = [ "crc32fast", - "libz-sys", - "miniz_oxide", + "miniz_oxide 0.7.1", ] [[package]] @@ -1559,7 +1561,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -1593,47 +1595,16 @@ dependencies = [ ] [[package]] -name = "fuzzcheck" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee76e8096c3fcd82ab23177edddcc9b81b72c123caab54bb1e2dc19fd09d2dec" +name = "fuzzers" +version = "1.2.0" dependencies = [ - "ahash 0.7.6", - "bit-vec", - "cc", - "cfg-if", + "arbitrary", + "clap 4.3.0", "fastrand", - "flate2", - "fuzzcheck_common", - "fuzzcheck_mutators_derive", - "getopts", - "libc", - "md5", - "nu-ansi-term", - "object 0.27.1", - "regex-syntax", + "milli", "serde", "serde_json", -] - -[[package]] -name = "fuzzcheck_common" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dde06f8d25b14a35d43eb2d3dbace3b9193424460b10ad4ccf1b3d542d48f06f" -dependencies = [ - "getopts", -] - -[[package]] -name = "fuzzcheck_mutators_derive" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30ce01e8bbb3e7e0758dcf907fe799f5998a54368963f766ae94b84624ba60c8" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "tempfile", ] [[package]] @@ -1661,20 +1632,11 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" -[[package]] -name = "getopts" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" -dependencies = [ - "unicode-width", -] - [[package]] name = "getrandom" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", @@ -1695,9 +1657,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.27.0" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec7af912d60cdbd3677c1af9352ebae6fb8394d165568a2234df0fa00f87793" +checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4" [[package]] name = "git2" @@ -1731,9 +1693,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.17" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66b91535aa35fea1523ad1b86cb6b53c28e0ae566ba4a460f4457e936cad7c6f" +checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" dependencies = [ "bytes", "fnv", @@ -1794,7 +1756,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "heed" version = "0.12.5" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e" dependencies = [ "byteorder", "heed-traits", @@ -1811,12 +1773,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e" dependencies = [ "bincode", "heed-traits", @@ -1906,9 +1868,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.25" +version = "0.14.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc5e554ff619822309ffd57d8734d77cd5ce6238bc956f037ea06c58238c9899" +checksum = "ab302d72a6f11a3b910431ff93aae7e773078c769f0a3ef15fb9ec692ed147d4" dependencies = [ "bytes", "futures-channel", @@ -1930,15 +1892,15 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.23.2" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" +checksum = "0646026eb1b3eea4cd9ba47912ea5ce9cc07713d105b1a14698f4e6433d348b7" dependencies = [ "http", "hyper", - "rustls", + "rustls 0.21.1", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.0", ] [[package]] @@ -1984,7 +1946,7 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -1998,6 +1960,15 @@ dependencies = [ "serde", ] +[[package]] +name = "inout" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" +dependencies = [ + "generic-array", +] + [[package]] name = "insta" version = "1.29.0" @@ -2025,9 +1996,9 @@ dependencies = [ [[package]] name = "io-lifetimes" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi 0.3.1", "libc", @@ -2059,7 +2030,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", - "rustix 0.37.11", + "rustix 0.37.19", "windows-sys 0.48.0", ] @@ -2104,9 +2075,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "2f37a4a5928311ac501dee68b3c7613a1037d0edb30c8e5427bd832d55d1b790" dependencies = [ "wasm-bindgen", ] @@ -2125,7 +2096,7 @@ version = "8.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "pem", "ring", "serde", @@ -2165,9 +2136,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.141" +version = "0.2.144" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "2b00cc1c228a6782d0f076e7b232802e0c5689d41bb5df366f2a6b6621cfdfe1" [[package]] name = "libgit2-sys" @@ -2183,15 +2154,15 @@ dependencies = [ [[package]] name = "libm" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "libmimalloc-sys" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a558e3d911bc3c7bfc8c78bc580b404d6e51c1cefbf656e176a94b49b0df40" +checksum = "f4ac0e912c8ef1b735e92369695618dc5b1819f5a7bf3f167301a3ba1cea515e" dependencies = [ "cc", "libc", @@ -2199,9 +2170,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf" +checksum = "56ee889ecc9568871456d42f603d6a0ce59ff328d291063a45cbdf0036baf6db" dependencies = [ "cc", "libc", @@ -2440,9 +2411,9 @@ checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" [[package]] name = "linux-raw-sys" -version = "0.3.1" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lmdb-rkv-sys" @@ -2484,12 +2455,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "518ef76f2f87365916b142844c16d8fefd85039bc5699050210a7778ee1cd1de" [[package]] name = "logging_timer" @@ -2515,14 +2483,14 @@ dependencies = [ [[package]] name = "manifest-dir-macros" -version = "0.1.16" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f08150cf2bab1fc47c2196f4f41173a27fcd0f684165e5458c0046b53a472e2f" +checksum = "450e5ef583bc05177c4975b9ea907047091a9f62e74e81fcafb99dbffac51e7e" dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", ] [[package]] @@ -2566,7 +2534,7 @@ dependencies = [ "byte-unit", "bytes", "cargo_toml", - "clap 4.2.1", + "clap 4.3.0", "crossbeam-channel", "deserr", "dump", @@ -2605,7 +2573,7 @@ dependencies = [ "rayon", "regex", "reqwest", - "rustls", + "rustls 0.20.8", "rustls-pemfile", "segment", "serde", @@ -2627,7 +2595,7 @@ dependencies = [ "tokio-stream", "toml", "urlencoding", - "uuid 1.3.1", + "uuid 1.3.3", "vergen", "walkdir", "yaup", @@ -2638,7 +2606,7 @@ dependencies = [ name = "meilisearch-auth" version = "1.2.0" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "enum-iterator", "hmac", "maplit", @@ -2650,7 +2618,7 @@ dependencies = [ "sha2", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -2680,7 +2648,7 @@ dependencies = [ "thiserror", "time", "tokio", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] @@ -2725,7 +2693,6 @@ dependencies = [ "filter-parser", "flatten-serde-json", "fst", - "fuzzcheck", "fxhash", "geoutils", "grenad", @@ -2756,14 +2723,14 @@ dependencies = [ "tempfile", "thiserror", "time", - "uuid 1.3.1", + "uuid 1.3.3", ] [[package]] name = "mimalloc" -version = "0.1.36" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d88dad3f985ec267a3fcb7a1726f5cb1a7e8cad8b646e70a84f967210df23da" +checksum = "4e2894987a3459f3ffb755608bd82188f8ed00d0ae077f1edea29c068d639d98" dependencies = [ "libmimalloc-sys", ] @@ -2800,15 +2767,24 @@ dependencies = [ ] [[package]] -name = "mio" -version = "0.8.6" +name = "miniz_oxide" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mio" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eebffdb73fe72e917997fad08bdbf31ac50b0fa91cec93e69a0662e4264d454c" dependencies = [ "libc", "log", "wasi", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -2839,23 +2815,13 @@ dependencies = [ [[package]] name = "ntapi" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc51db7b362b205941f71232e56c625156eb9a929f8cf74a428fd5bc094a4afc" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" dependencies = [ "winapi", ] -[[package]] -name = "nu-ansi-term" -version = "0.39.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e62e2187cbceeafee9fb7b5e5e182623e0628ebf430a479df4487beb8f92fd7a" -dependencies = [ - "overload", - "winapi", -] - [[package]] name = "num-bigint" version = "0.4.3" @@ -2899,18 +2865,9 @@ dependencies = [ [[package]] name = "object" -version = "0.27.1" +version = "0.30.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" -dependencies = [ - "memchr", -] - -[[package]] -name = "object" -version = "0.30.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b8c786513eb403643f2a88c244c2aaa270ef2153f55094587d0c48a3cf22a83" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" dependencies = [ "memchr", ] @@ -2933,17 +2890,11 @@ version = "11.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" -[[package]] -name = "opaque-debug" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" - [[package]] name = "ordered-float" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13a384337e997e6860ffbaa83708b2ef329fd8c54cb67a5f64d421e0f943254f" +checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213" dependencies = [ "num-traits", ] @@ -2954,12 +2905,6 @@ version = "6.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - [[package]] name = "page_size" version = "0.4.2" @@ -3049,9 +2994,9 @@ dependencies = [ [[package]] name = "pem" -version = "1.1.0" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c64931a1a212348ec4f3b4362585eca7159d0d09cbdf4a7f74f02173596fd4" +checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8" dependencies = [ "base64 0.13.1", ] @@ -3072,9 +3017,9 @@ dependencies = [ [[package]] name = "pest" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b1403e8401ad5dedea73c626b99758535b342502f8d1e361f4a2dd952749122" +checksum = "e68e84bfb01f0507134eac1e9b410a12ba379d064eab48c50ba4ce329a527b70" dependencies = [ "thiserror", "ucd-trie", @@ -3082,9 +3027,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be99c4c1d2fc2769b1d00239431d711d08f6efedcecb8b6e30707160aee99c15" +checksum = "6b79d4c71c865a25a4322296122e3924d30bc8ee0834c8bfc8b95f7f054afbfb" dependencies = [ "pest", "pest_generator", @@ -3092,22 +3037,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e56094789873daa36164de2e822b3888c6ae4b4f9da555a1103587658c805b1e" +checksum = "6c435bf1076437b851ebc8edc3a18442796b30f1728ffea6262d59bbe28b077e" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] name = "pest_meta" -version = "2.5.7" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6733073c7cff3d8459fda0e42f13a047870242aed8b509fe98000928975f359e" +checksum = "745a452f8eb71e39ffd8ee32b3c5f51d03845f99786fa9b68db6ff509c505411" dependencies = [ "once_cell", "pest", @@ -3172,9 +3117,9 @@ checksum = "3bd12336e3afa34152e002f57df37a7056778daa59ea542b3473b87f5fb260c4" [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" [[package]] name = "platform-dirs" @@ -3245,9 +3190,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "6aeca18b86b413c660b781aa319e4e2648a3e6f9eadc9b47e9038e6fe9f3451b" dependencies = [ "unicode-ident", ] @@ -3262,7 +3207,7 @@ dependencies = [ "byteorder", "hex", "lazy_static", - "rustix 0.36.11", + "rustix 0.36.14", ] [[package]] @@ -3290,9 +3235,9 @@ checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "quote" -version = "1.0.26" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" +checksum = "1b9ab9c7eadfd8df19006f1cf1a4aed13540ed5cbc047010ece5826e10825488" dependencies = [ "proc-macro2", ] @@ -3380,9 +3325,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.7.3" +version = "1.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" +checksum = "81ca098a9821bd52d6b24fd8b10bd081f47d39c22778cafaa75a2857a62c6390" dependencies = [ "aho-corasick", "memchr", @@ -3397,17 +3342,17 @@ checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" [[package]] name = "regex-syntax" -version = "0.6.29" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" +checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" [[package]] name = "reqwest" -version = "0.11.16" +version = "0.11.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b71749df584b7f4cac2c426c127a7c785a5106cc98f7a8feb044115f0fa254" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", "bytes", "encoding_rs", "futures-core", @@ -3424,13 +3369,13 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", + "rustls 0.21.1", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls", + "tokio-rustls 0.24.0", "tower-service", "url", "wasm-bindgen", @@ -3487,9 +3432,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" [[package]] name = "rustc_version" @@ -3502,12 +3447,12 @@ dependencies = [ [[package]] name = "rustix" -version = "0.36.11" +version = "0.36.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db4165c9963ab29e422d6c26fbc1d37f15bace6b2810221f9d925023480fcf0e" +checksum = "14e4d67015953998ad0eb82887a0eb0129e18a7e2f3b7b0f6c422fddcd503d62" dependencies = [ "bitflags", - "errno 0.2.8", + "errno", "io-lifetimes", "libc", "linux-raw-sys 0.1.4", @@ -3516,15 +3461,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.11" +version = "0.37.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85597d61f83914ddeba6a47b3b8ffe7365107221c2e557ed94426489fefb5f77" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" dependencies = [ "bitflags", - "errno 0.3.1", + "errno", "io-lifetimes", "libc", - "linux-raw-sys 0.3.1", + "linux-raw-sys 0.3.8", "windows-sys 0.48.0", ] @@ -3540,13 +3485,35 @@ dependencies = [ "webpki", ] +[[package]] +name = "rustls" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c911ba11bc8433e811ce56fde130ccf32f5127cab0e0194e9c68c5a5b671791e" +dependencies = [ + "log", + "ring", + "rustls-webpki", + "sct", +] + [[package]] name = "rustls-pemfile" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64 0.21.0", + "base64 0.21.2", +] + +[[package]] +name = "rustls-webpki" +version = "0.100.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6207cd5ed3d8dca7816f8f3725513a34609c0c765bf652b8c3cb4cfd87db46b" +dependencies = [ + "ring", + "untrusted", ] [[package]] @@ -3608,9 +3575,9 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" [[package]] name = "serde" -version = "1.0.160" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" +checksum = "2113ab51b87a539ae008b5c6c02dc020ffa39afd2d83cffcb3f4eb2722cebec2" dependencies = [ "serde_derive", ] @@ -3626,20 +3593,20 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.160" +version = "1.0.163" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" +checksum = "8c805777e3930c8883389c602315a24224bcc738b63905ef87cd1420353ea93e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "indexmap", "itoa", @@ -3649,9 +3616,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0efd8caf556a6cebd3b285caf480045fcc1ac04f6bd786b09a6f11af30c4fcf4" +checksum = "93107647184f6027e3b7dcb2e11034cf95ffa1e3a682c67951963ac69c1c007d" dependencies = [ "serde", ] @@ -3745,9 +3712,9 @@ dependencies = [ [[package]] name = "slice-group-by" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" [[package]] name = "smallstr" @@ -3832,9 +3799,9 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "subtle" -version = "2.4.1" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" [[package]] name = "syn" @@ -3849,9 +3816,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.14" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf316d5356ed6847742d036f8a39c3b8435cac10bd528a4bd461928a6ab34d5" +checksum = "32d41677bcbe24c20c52e7c70b0d8db04134c5d1066bf98662e2871ad200ea3e" dependencies = [ "proc-macro2", "quote", @@ -3907,9 +3874,9 @@ dependencies = [ [[package]] name = "temp-env" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee95b343d943e5a0d2221fb73029e8040f3c91d6d06afec86c664682a361681" +checksum = "9547444bfe52cbd79515c6c8087d8ae6ca8d64d2d31a27746320f5cb81d1a15c" dependencies = [ "parking_lot", ] @@ -3923,7 +3890,7 @@ dependencies = [ "cfg-if", "fastrand", "redox_syscall 0.3.5", - "rustix 0.37.11", + "rustix 0.37.19", "windows-sys 0.45.0", ] @@ -3959,14 +3926,14 @@ checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] name = "time" -version = "0.3.20" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890" +checksum = "8f3403384eaacbca9923fa06940178ac13e4edb725486d70e8e15881d0c836cc" dependencies = [ "itoa", "serde", @@ -3976,15 +3943,15 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" +checksum = "7300fbefb4dadc1af235a9cef3737cea692a9d97e1b9cbcd4ebdae6f8868e6fb" [[package]] name = "time-macros" -version = "0.2.8" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36" +checksum = "372950940a5f07bf38dbe211d7283c9e6d7327df53794992d293e534c733d09b" dependencies = [ "time-core", ] @@ -4016,9 +3983,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.27.0" +version = "1.28.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0de47a4eecbe11f498978a9b29d792f0d2692d1dd003650c24c76510e3bc001" +checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" dependencies = [ "autocfg", "bytes", @@ -4030,18 +3997,18 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] name = "tokio-macros" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.14", + "syn 2.0.18", ] [[package]] @@ -4050,16 +4017,26 @@ version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" dependencies = [ - "rustls", + "rustls 0.20.8", "tokio", "webpki", ] [[package]] -name = "tokio-stream" -version = "0.1.12" +name = "tokio-rustls" +version = "0.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fb52b74f05dbf495a8fba459fdc331812b96aa086d9eb78101fa0d4569c3313" +checksum = "e0d409377ff5b1e3ca6437aa86c1eb7d40c134bfec254e44c830defa92669db5" +dependencies = [ + "rustls 0.21.1", + "tokio", +] + +[[package]] +name = "tokio-stream" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" dependencies = [ "futures-core", "pin-project-lite", @@ -4068,9 +4045,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2" +checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" dependencies = [ "bytes", "futures-core", @@ -4082,9 +4059,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b403acf6f2bb0859c93c7f0d967cb4a75a7ac552100f9322faf64dc047669b21" +checksum = "d6135d499e69981f9ff0ef2167955a5333c35e36f6937d382974566b3d5b94ec" dependencies = [ "serde", "serde_spanned", @@ -4094,18 +4071,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab8ed2edee10b50132aed5f331333428b011c99402b5a534154ed15746f9622" +checksum = "5a76a9312f5ba4c2dec6b9161fdf25d87ad8a09256ccea5a556fef03c706a10f" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.19.8" +version = "0.19.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "239410c8609e8125456927e6707163a3b1fdb40561e4b803bc041f466ccfdc13" +checksum = "2380d56e8670370eee6566b0bfd4265f65b3f432e8c6d85623f728d4fa31f739" dependencies = [ "indexmap", "serde", @@ -4134,9 +4111,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" +checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", ] @@ -4176,15 +4153,15 @@ checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460" [[package]] name = "unicode-blocks" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9de2be6bad6f56ce8373d377e611cbb2265de3a656138065609ce82e217aad70" +checksum = "943e3f1f50cc455d072e0801ccb71ff893b0c88060b1169f92e35fb5bb881cc6" [[package]] name = "unicode-ident" -version = "1.0.8" +version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +checksum = "b15811caf2415fb889178633e7724bad2509101cde276048e013b9def5e51fa0" [[package]] name = "unicode-normalization" @@ -4201,12 +4178,6 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - [[package]] name = "unicode-xid" version = "0.2.4" @@ -4259,9 +4230,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.3.1" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b55a3fef2a1e3b3a00ce878640918820d3c51081576ac657d23af9fc7928fdb" +checksum = "345444e32442451b267fc254ae85a209c64be56d2890e601a0c37ff0c3c5ecd2" dependencies = [ "getrandom", "serde", @@ -4334,9 +4305,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "5bba0e8cb82ba49ff4e229459ff22a191bbe9a1cb3a341610c9c33efc27ddf73" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -4344,24 +4315,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "19b04bc93f9d6bdee709f6bd2118f57dd6679cf1176a1af464fca3ab0d66d8fb" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.33" +version = "0.4.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" +checksum = "2d1985d03709c53167ce907ff394f5316aa22cb4e12761295c5dc57dacb6297e" dependencies = [ "cfg-if", "js-sys", @@ -4371,9 +4342,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "14d6b024f1a526bb0234f52840389927257beb670610081360e5a03c5df9c258" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4381,28 +4352,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.18", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "ed9d5b4305409d1fc9482fee2d7f9bcbf24b3972bf59817ef757e23982242a93" [[package]] name = "web-sys" -version = "0.3.60" +version = "0.3.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +checksum = "3bdd9ef4e984da1187bf8110c5cf5b845fbc87a23602cdf912386a76fcd3a7c2" dependencies = [ "js-sys", "wasm-bindgen", @@ -4468,21 +4439,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-sys" -version = "0.42.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-sys" version = "0.45.0" @@ -4617,9 +4573,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" [[package]] name = "winnow" -version = "0.4.1" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae8970b36c66498d8ff1d66685dc86b91b29db0c7739899012f63a63814b4b28" +checksum = "61de7bac303dc551fe038e2b3cef0f571087a47571ea6e79a87692ac99b99699" dependencies = [ "memchr", ] @@ -4690,9 +4646,9 @@ dependencies = [ [[package]] name = "zip" -version = "0.6.4" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0445d0fbc924bb93539b4316c11afb121ea39296f99a3c4c9edad09e3658cdef" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" dependencies = [ "aes", "byteorder", @@ -4723,7 +4679,7 @@ version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe 6.0.4+zstd.1.5.4", + "zstd-safe 6.0.5+zstd.1.5.4", ] [[package]] @@ -4738,9 +4694,9 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "6.0.4+zstd.1.5.4" +version = "6.0.5+zstd.1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" dependencies = [ "libc", "zstd-sys", @@ -4748,9 +4704,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.7+zstd.1.5.4" +version = "2.0.8+zstd.1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" dependencies = [ "cc", "libc", diff --git a/Cargo.toml b/Cargo.toml index b69831b9c..f7e5758d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,8 @@ members = [ "filter-parser", "flatten-serde-json", "json-depth-checker", - "benchmarks" + "benchmarks", + "fuzzers", ] [workspace.package] diff --git a/Dockerfile b/Dockerfile index 0d7593f79..70950f338 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ -# syntax=docker/dockerfile:1.4 # Compile FROM rust:alpine3.16 AS compiler @@ -12,7 +11,7 @@ ARG GIT_TAG ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG} ENV RUSTFLAGS="-C target-feature=-crt-static" -COPY --link . . +COPY . . RUN set -eux; \ apkArch="$(apk --print-arch)"; \ if [ "$apkArch" = "aarch64" ]; then \ @@ -31,7 +30,7 @@ RUN apk update --quiet \ # add meilisearch to the `/bin` so you can run it from anywhere and it's easy # to find. -COPY --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch +COPY --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch # To stay compatible with the older version of the container (pre v0.27.0) we're # going to symlink the meilisearch binary in the path to `/meilisearch` RUN ln -s /bin/meilisearch /meilisearch diff --git a/assets/grafana-dashboard.json b/assets/grafana-dashboard.json new file mode 100644 index 000000000..d00069293 --- /dev/null +++ b/assets/grafana-dashboard.json @@ -0,0 +1,1376 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 14, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "refId": "A" + } + ], + "title": "Web application metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "purple", + "mode": "fixed" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 22, + "interval": "5s", + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Indexes Count", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 18, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}", + "hide": false, + "range": true, + "refId": "A" + } + ], + "title": "Total Documents", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 19, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Searches (1h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 20, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Searches (24h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 21, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(meilisearch_http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Searches (30d)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 2, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", + "interval": "", + "legendFormat": "Database size on disk", + "range": true, + "refId": "DB Size on disk" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "editorMode": "builder", + "expr": "meilisearch_used_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", + "hide": false, + "legendFormat": "Used bytes", + "range": true, + "refId": "Actual used bytes" + } + ], + "title": "Database Size in bytes", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 3, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(http_response_time_seconds_count[5m])", + "interval": "", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Mean response time (All Indexes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "c4085c47-f6d3-45dd-b761-6809055bb749" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 1, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(meilisearch_http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", + "interval": "", + "legendFormat": "{{method}} {{path}}", + "range": true, + "refId": "A" + } + ], + "title": "HTTP requests per second (All Indexes)", + "type": "timeseries" + }, + { + "cards": {}, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "tsbuckets", + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 18 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 16, + "legend": { + "show": false + }, + "options": { + "calculate": false, + "calculation": {}, + "cellGap": 2, + "cellValues": {}, + "color": { + "exponent": 0.4, + "fill": "#b4ff00", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Blues", + "steps": 128 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": false + }, + "rowsFrame": { + "layout": "auto" + }, + "showValue": "never", + "tooltip": { + "show": true, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "decimals": 0, + "reverse": false, + "unit": "s" + } + }, + "pluginVersion": "9.5.2", + "reverseYBuckets": false, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "sum by(le) (increase(http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))", + "format": "heatmap", + "interval": "", + "legendFormat": "{{le}}", + "range": true, + "refId": "A" + } + ], + "title": "Response time distribution over time (`POST /indexes/:index/search`)", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": 10, + "yAxis": { + "decimals": 2, + "format": "s", + "logBase": 1, + "show": true + }, + "yBucketBound": "auto", + "yBucketNumber": 10 + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 23, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"statuses\"}", + "interval": "", + "legendFormat": "{{value}} ", + "range": true, + "refId": "A" + } + ], + "title": "Number of tasks by statuses", + "type": "timeseries" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 24, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"types\"}", + "interval": "", + "legendFormat": "{{value}} ", + "range": true, + "refId": "A" + } + ], + "title": "Number of tasks by types", + "type": "timeseries" + }, + { + "datasource": {}, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 25, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.4", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "meilisearch_nb_tasks{instance=\"$instance\", job=\"meilisearch\", kind=\"indexes\"}", + "interval": "", + "legendFormat": "{{value}} ", + "range": true, + "refId": "A" + } + ], + "title": "Number of tasks by indexes", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "i51CxikVz" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 51 + }, + "id": 12, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "i51CxikVz" + }, + "refId": "A" + } + ], + "title": "System metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 4, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])", + "interval": "", + "legendFormat": "process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus" + }, + "exemplar": true, + "expr": "sum(rate(container_cpu_usage_seconds_total{name='mongodb-redis'}[1m])) by (name)", + "interval": "", + "legendFormat": "container", + "refId": "B" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "continuous-YlBl" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "MiB", + "axisPlacement": "left", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 15, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 5, + "interval": "5s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024", + "interval": "", + "legendFormat": "process", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus" + }, + "exemplar": true, + "expr": "container_memory_usage_bytes{name=\"mongodb-redis\"} / 1024 / 1024", + "interval": "", + "legendFormat": "container", + "refId": "B" + } + ], + "title": "Memory usage", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "localhost:7700", + "value": "localhost:7700" + }, + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "definition": "label_values(instance)", + "hide": 0, + "includeAll": false, + "label": "Instance", + "multi": false, + "name": "instance", + "options": [], + "query": { + "query": "label_values(instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "mieli", + "value": "mieli" + }, + "datasource": { + "type": "prometheus", + "uid": "bb3298a4-9acf-4da1-b86a-813f29f50888" + }, + "definition": "label_values(index)", + "hide": 0, + "includeAll": false, + "label": "index", + "multi": false, + "name": "Index", + "options": [], + "query": { + "query": "label_values(index)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m" + ] + }, + "timezone": "", + "title": "Meilisearch", + "uid": "7wcZ94dnz", + "version": 6, + "weekStart": "" +} \ No newline at end of file diff --git a/assets/prometheus-basic-scraper.yml b/assets/prometheus-basic-scraper.yml new file mode 100644 index 000000000..762f0ad43 --- /dev/null +++ b/assets/prometheus-basic-scraper.yml @@ -0,0 +1,19 @@ +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'meilisearch' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + static_configs: + - targets: ['localhost:7700'] \ No newline at end of file diff --git a/config.toml b/config.toml index 71087f25f..c47989f56 100644 --- a/config.toml +++ b/config.toml @@ -1,128 +1,131 @@ # This file shows the default configuration of Meilisearch. # All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables -db_path = "./data.ms" # Designates the location where database files will be created and retrieved. # https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path +db_path = "./data.ms" -env = "development" # Configures the instance's environment. Value must be either `production` or `development`. # https://www.meilisearch.com/docs/learn/configuration/instance_options#environment +env = "development" -http_addr = "localhost:7700" # The address on which the HTTP server will listen. +http_addr = "localhost:7700" -# master_key = "YOUR_MASTER_KEY_VALUE" # Sets the instance's master key, automatically protecting all routes except GET /health. # https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key +# master_key = "YOUR_MASTER_KEY_VALUE" -# no_analytics = true # Deactivates Meilisearch's built-in telemetry when provided. # Meilisearch automatically collects data from all instances that do not opt out using this flag. # All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time. # https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics +# no_analytics = true -http_payload_size_limit = "100 MB" # Sets the maximum size of accepted payloads. # https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size +http_payload_size_limit = "100 MB" -log_level = "INFO" # Defines how much detail should be present in Meilisearch's logs. # Meilisearch currently supports six log levels, listed in order of increasing verbosity: `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE` # https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level +log_level = "INFO" -# max_indexing_memory = "2 GiB" # Sets the maximum amount of RAM Meilisearch can use when indexing. # https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory +# max_indexing_memory = "2 GiB" -# max_indexing_threads = 4 # Sets the maximum number of threads Meilisearch can use during indexing. # https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads +# max_indexing_threads = 4 ############# ### DUMPS ### ############# -dump_dir = "dumps/" # Sets the directory where Meilisearch will create dump files. # https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory +dump_dir = "dumps/" -# import_dump = "./path/to/my/file.dump" # Imports the dump file located at the specified path. Path must point to a .dump file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump +# import_dump = "./path/to/my/file.dump" -ignore_missing_dump = false # Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump +ignore_missing_dump = false -ignore_dump_if_db_exists = false # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists +ignore_dump_if_db_exists = false ################# ### SNAPSHOTS ### ################# -schedule_snapshot = false # Enables scheduled snapshots when true, disable when false (the default). # If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval # between each snapshot, in seconds. # https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation +schedule_snapshot = false -snapshot_dir = "snapshots/" # Sets the directory where Meilisearch will store snapshots. # https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination +snapshot_dir = "snapshots/" -# import_snapshot = "./path/to/my/snapshot" # Launches Meilisearch after importing a previously-generated snapshot at the given filepath. # https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot +# import_snapshot = "./path/to/my/snapshot" -ignore_missing_snapshot = false # Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot +ignore_missing_snapshot = false -ignore_snapshot_if_db_exists = false # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists +ignore_snapshot_if_db_exists = false ########### ### SSL ### ########### -# ssl_auth_path = "./path/to/root" # Enables client authentication in the specified path. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path +# ssl_auth_path = "./path/to/root" -# ssl_cert_path = "./path/to/certfile" # Sets the server's SSL certificates. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path +# ssl_cert_path = "./path/to/certfile" -# ssl_key_path = "./path/to/private-key" # Sets the server's SSL key files. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path +# ssl_key_path = "./path/to/private-key" -# ssl_ocsp_path = "./path/to/ocsp-file" # Sets the server's OCSP file. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path +# ssl_ocsp_path = "./path/to/ocsp-file" -ssl_require_auth = false # Makes SSL authentication mandatory. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth +ssl_require_auth = false -ssl_resumption = false # Activates SSL session resumption. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption +ssl_resumption = false -ssl_tickets = false # Activates SSL tickets. # https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets +ssl_tickets = false ############################# ### Experimental features ### ############################# -experimental_enable_metrics = false # Experimental metrics feature. For more information, see: # Enables the Prometheus metrics on the `GET /metrics` endpoint. +experimental_enable_metrics = false + +# Experimental RAM reduction during indexing, do not use in production, see: +experimental_reduce_indexing_memory_usage = false diff --git a/fuzzers/Cargo.toml b/fuzzers/Cargo.toml new file mode 100644 index 000000000..cbc27a55c --- /dev/null +++ b/fuzzers/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "fuzzers" +publish = false + +version.workspace = true +authors.workspace = true +description.workspace = true +homepage.workspace = true +readme.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +arbitrary = { version = "1.3.0", features = ["derive"] } +clap = { version = "4.3.0", features = ["derive"] } +fastrand = "1.9.0" +milli = { path = "../milli" } +serde = { version = "1.0.160", features = ["derive"] } +serde_json = { version = "1.0.95", features = ["preserve_order"] } +tempfile = "3.5.0" diff --git a/fuzzers/README.md b/fuzzers/README.md new file mode 100644 index 000000000..d9d02de0e --- /dev/null +++ b/fuzzers/README.md @@ -0,0 +1,3 @@ +# Fuzzers + +The purpose of this crate is to contains all the handmade "fuzzer" we may need. diff --git a/fuzzers/src/bin/fuzz-indexing.rs b/fuzzers/src/bin/fuzz-indexing.rs new file mode 100644 index 000000000..1d53e069c --- /dev/null +++ b/fuzzers/src/bin/fuzz-indexing.rs @@ -0,0 +1,152 @@ +use std::num::NonZeroUsize; +use std::path::PathBuf; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::time::Duration; + +use arbitrary::{Arbitrary, Unstructured}; +use clap::Parser; +use fuzzers::Operation; +use milli::heed::EnvOpenOptions; +use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig}; +use milli::Index; +use tempfile::TempDir; + +#[derive(Debug, Arbitrary)] +struct Batch([Operation; 5]); + +#[derive(Debug, Clone, Parser)] +struct Opt { + /// The number of fuzzer to run in parallel. + #[clap(long)] + par: Option, + // We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line + /// The path in which the databases will be created. + /// Using a ramdisk is recommended. + /// + /// Linux: + /// + /// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it + /// + /// sudo umount ramdisk # to remove it + /// + /// MacOS: + /// + /// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it + /// + /// hdiutil detach /dev/:the_disk + #[clap(long)] + path: Option, +} + +fn main() { + let opt = Opt::parse(); + let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0))); + let stop: &'static AtomicBool = Box::leak(Box::new(AtomicBool::new(false))); + + let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get(); + let mut handles = Vec::with_capacity(par); + + for _ in 0..par { + let opt = opt.clone(); + + let handle = std::thread::spawn(move || { + let mut options = EnvOpenOptions::new(); + options.map_size(1024 * 1024 * 1024 * 1024); + let tempdir = match opt.path { + Some(path) => TempDir::new_in(path).unwrap(), + None => TempDir::new().unwrap(), + }; + let index = Index::new(options, tempdir.path()).unwrap(); + let indexer_config = IndexerConfig::default(); + let index_documents_config = IndexDocumentsConfig::default(); + + std::thread::scope(|s| { + loop { + if stop.load(Ordering::Relaxed) { + return; + } + let v: Vec = + std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect(); + + let mut data = Unstructured::new(&v); + let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap(); + // will be used to display the error once a thread crashes + let dbg_input = format!("{:#?}", batches); + + let handle = s.spawn(|| { + let mut wtxn = index.write_txn().unwrap(); + + for batch in batches { + let mut builder = IndexDocuments::new( + &mut wtxn, + &index, + &indexer_config, + index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + for op in batch.0 { + match op { + Operation::AddDoc(doc) => { + let documents = + milli::documents::objects_from_json_value(doc.to_d()); + let documents = + milli::documents::documents_batch_reader_from_objects( + documents, + ); + let (b, _added) = builder.add_documents(documents).unwrap(); + builder = b; + } + Operation::DeleteDoc(id) => { + let (b, _removed) = + builder.remove_documents(vec![id.to_s()]).unwrap(); + builder = b; + } + } + } + builder.execute().unwrap(); + + // after executing a batch we check if the database is corrupted + let res = index.search(&wtxn).execute().unwrap(); + index.documents(&wtxn, res.documents_ids).unwrap(); + progression.fetch_add(1, Ordering::Relaxed); + } + wtxn.abort().unwrap(); + }); + if let err @ Err(_) = handle.join() { + stop.store(true, Ordering::Relaxed); + err.expect(&dbg_input); + } + } + }); + }); + handles.push(handle); + } + + std::thread::spawn(|| { + let mut last_value = 0; + let start = std::time::Instant::now(); + loop { + let total = progression.load(Ordering::Relaxed); + let elapsed = start.elapsed().as_secs(); + if elapsed > 3600 { + // after 1 hour, stop the fuzzer, success + std::process::exit(0); + } + println!( + "Has been running for {:?} seconds. Tested {} new values for a total of {}.", + elapsed, + total - last_value, + total + ); + last_value = total; + std::thread::sleep(Duration::from_secs(1)); + } + }); + + for handle in handles { + handle.join().unwrap(); + } +} diff --git a/fuzzers/src/lib.rs b/fuzzers/src/lib.rs new file mode 100644 index 000000000..c0eef38e8 --- /dev/null +++ b/fuzzers/src/lib.rs @@ -0,0 +1,46 @@ +use arbitrary::Arbitrary; +use serde_json::{json, Value}; + +#[derive(Debug, Arbitrary)] +pub enum Document { + One, + Two, + Three, + Four, + Five, + Six, +} + +impl Document { + pub fn to_d(&self) -> Value { + match self { + Document::One => json!({ "id": 0, "doggo": "bernese" }), + Document::Two => json!({ "id": 0, "doggo": "golden" }), + Document::Three => json!({ "id": 0, "catto": "jorts" }), + Document::Four => json!({ "id": 1, "doggo": "bernese" }), + Document::Five => json!({ "id": 1, "doggo": "golden" }), + Document::Six => json!({ "id": 1, "catto": "jorts" }), + } + } +} + +#[derive(Debug, Arbitrary)] +pub enum DocId { + Zero, + One, +} + +impl DocId { + pub fn to_s(&self) -> String { + match self { + DocId::Zero => "0".to_string(), + DocId::One => "1".to_string(), + } + } +} + +#[derive(Debug, Arbitrary)] +pub enum Operation { + AddDoc(Document), + DeleteDoc(DocId), +} diff --git a/grafana-dashboards/dashboard.json b/grafana-dashboards/dashboard.json deleted file mode 100644 index 53865ad56..000000000 --- a/grafana-dashboards/dashboard.json +++ /dev/null @@ -1,1007 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 1, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 14, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "i51CxikVz" - }, - "refId": "A" - } - ], - "title": "Web application metrics", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 0, - "y": 1 - }, - "id": 2, - "interval": "5s", - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "meilisearch_db_size_bytes{job=\"meilisearch\", instance=\"$instance\"}", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Database Size", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "purple", - "mode": "fixed" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 4, - "y": 1 - }, - "id": 22, - "interval": "5s", - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "meilisearch_index_count{job=\"meilisearch\", instance=\"$instance\"}", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Indexes Count", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 8, - "y": 1 - }, - "id": 18, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "expr": "meilisearch_index_docs_count{job=\"meilisearch\", index=\"$Index\", instance=\"$instance\"}", - "hide": false, - "range": true, - "refId": "A" - } - ], - "title": "Total Documents", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 12, - "y": 1 - }, - "id": 19, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[1h]))", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Searches (1h)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 16, - "y": 1 - }, - "id": 20, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[24h]))", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Searches (24h)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 21, - "options": { - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true, - "text": {} - }, - "pluginVersion": "9.0.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "round(increase(http_requests_total{method=\"POST\", path=\"/indexes/$Index/search\", job=\"meilisearch\"}[30d]))", - "interval": "", - "legendFormat": "", - "range": true, - "refId": "A" - } - ], - "title": "Total Searches (30d)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 7 - }, - "id": 1, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "rate(http_requests_total{instance=\"$instance\", job=\"meilisearch\"}[5m])", - "interval": "", - "legendFormat": "{{method}} {{path}}", - "range": true, - "refId": "A" - } - ], - "title": "HTTP requests per second (All Indexes)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 7 - }, - "id": 3, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.1.4", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "rate(http_response_time_seconds_sum{instance=\"$instance\", job=\"meilisearch\"}[5m]) / rate(http_response_time_seconds_count[5m])", - "interval": "", - "legendFormat": "{{method}} {{path}}", - "range": true, - "refId": "A" - } - ], - "title": "Mean response time (All Indexes)", - "type": "timeseries" - }, - { - "cards": {}, - "color": { - "cardColor": "#b4ff00", - "colorScale": "sqrt", - "colorScheme": "interpolateBlues", - "exponent": 0.5, - "mode": "spectrum" - }, - "dataFormat": "tsbuckets", - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "gridPos": { - "h": 12, - "w": 24, - "x": 0, - "y": 18 - }, - "heatmap": {}, - "hideZeroBuckets": false, - "highlightCards": true, - "id": 16, - "legend": { - "show": false - }, - "pluginVersion": "8.1.4", - "reverseYBuckets": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "sum by(le) (increase(http_response_time_seconds_bucket{path=\"/indexes/$Index/search\", instance=\"$instance\", job=\"meilisearch\"}[30s]))", - "format": "heatmap", - "interval": "", - "legendFormat": "{{le}}", - "range": true, - "refId": "A" - } - ], - "title": "Response time distribution over time (`POST /indexes/:index/search`)", - "tooltip": { - "show": true, - "showHistogram": false - }, - "type": "heatmap", - "xAxis": { - "show": true - }, - "xBucketNumber": 10, - "yAxis": { - "decimals": 2, - "format": "s", - "logBase": 1, - "show": true - }, - "yBucketBound": "auto", - "yBucketNumber": 10 - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "i51CxikVz" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 12, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "i51CxikVz" - }, - "refId": "A" - } - ], - "title": "System metrics", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 0, - "y": 31 - }, - "id": 4, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{job=\"meilisearch\", instance=\"$instance\"}[1m])", - "interval": "", - "legendFormat": "process", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "exemplar": true, - "expr": "sum(rate(container_cpu_usage_seconds_total{name='mongodb-redis'}[1m])) by (name)", - "interval": "", - "legendFormat": "container", - "refId": "B" - } - ], - "title": "CPU usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "continuous-YlBl" - }, - "custom": { - "axisLabel": "MiB", - "axisPlacement": "left", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 15, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 11, - "w": 12, - "x": 12, - "y": 31 - }, - "id": 5, - "interval": "5s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "editorMode": "builder", - "exemplar": true, - "expr": "process_resident_memory_bytes{job=\"meilisearch\", instance=\"$instance\"} / 1024 / 1024", - "interval": "", - "legendFormat": "process", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "exemplar": true, - "expr": "container_memory_usage_bytes{name=\"mongodb-redis\"} / 1024 / 1024", - "interval": "", - "legendFormat": "container", - "refId": "B" - } - ], - "title": "Memory usage", - "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 36, - "style": "dark", - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "localhost:7700", - "value": "localhost:7700" - }, - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "definition": "label_values(instance)", - "hide": 0, - "includeAll": false, - "label": "Instance", - "multi": false, - "name": "instance", - "options": [], - "query": { - "query": "label_values(instance)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": { - "selected": false, - "text": "movie-collection", - "value": "movie-collection" - }, - "datasource": { - "type": "prometheus", - "uid": "1MRsknzVz" - }, - "definition": "label_values(index)", - "hide": 0, - "includeAll": false, - "label": "index", - "multi": false, - "name": "Index", - "options": [], - "query": { - "query": "label_values(index)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - } - ] - }, - "time": { - "from": "now-15m", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m" - ] - }, - "timezone": "", - "title": "Meilisearch", - "uid": "7wcZ94dnz", - "version": 47, - "weekStart": "" - } \ No newline at end of file diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index d738cc5e4..096bcce14 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -321,9 +321,18 @@ impl BatchKind { }) } ( - this @ BatchKind::DocumentOperation { .. }, + BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, K::DocumentDeletion, - ) => Break(this), + ) => { + operation_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids, + }) + } // but we can't autobatch documents if it's not the same kind // this match branch MUST be AFTER the previous one ( @@ -346,7 +355,35 @@ impl BatchKind { deletion_ids.push(id); Continue(BatchKind::DocumentClear { ids: deletion_ids }) } - // we can't autobatch a deletion and an import + // we can autobatch the deletion and import if the index already exists + ( + BatchKind::DocumentDeletion { mut deletion_ids }, + K::DocumentImport { method, allow_index_creation, primary_key } + ) if index_already_exists => { + deletion_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: deletion_ids, + }) + } + // we can autobatch the deletion and import if both can't create an index + ( + BatchKind::DocumentDeletion { mut deletion_ids }, + K::DocumentImport { method, allow_index_creation, primary_key } + ) if !allow_index_creation => { + deletion_ids.push(id); + + Continue(BatchKind::DocumentOperation { + method, + allow_index_creation, + primary_key, + operation_ids: deletion_ids, + }) + } + // we can't autobatch a deletion and an import if the index does not exists but would be created by an addition ( this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. } @@ -648,36 +685,36 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); - // We can't autobatch document addition with document deletion - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - // we also can't do the only way around - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + // We can autobatch document addition with document deletion + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + // And the other way around + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); } #[test] diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index c88234809..67f70d367 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -24,6 +24,7 @@ use std::io::BufWriter; use dump::IndexMetadata; use log::{debug, error, info}; +use meilisearch_types::error::Code; use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; use meilisearch_types::milli::heed::CompactionOption; @@ -1491,7 +1492,12 @@ fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result Ok(if let Some(filter) = filter { let mut wtxn = index.write_txn()?; - let candidates = filter.evaluate(&wtxn, &index)?; + let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err { + milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { + Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter) + } + e => e.into(), + })?; let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?; delete_operation.delete_documents(&candidates); let deleted_documents = diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index 3a19ed4d2..acab850d1 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -46,6 +46,8 @@ impl From for Code { #[allow(clippy::large_enum_variant)] #[derive(Error, Debug)] pub enum Error { + #[error("{1}")] + WithCustomErrorCode(Code, Box), #[error("Index `{0}` not found.")] IndexNotFound(String), #[error("Index `{0}` already exists.")] @@ -144,6 +146,7 @@ impl Error { pub fn is_recoverable(&self) -> bool { match self { Error::IndexNotFound(_) + | Error::WithCustomErrorCode(_, _) | Error::IndexAlreadyExists(_) | Error::SwapDuplicateIndexFound(_) | Error::SwapDuplicateIndexesFound(_) @@ -176,11 +179,16 @@ impl Error { Error::PlannedFailure => false, } } + + pub fn with_custom_error_code(self, code: Code) -> Self { + Self::WithCustomErrorCode(code, Box::new(self)) + } } impl ErrorCode for Error { fn error_code(&self) -> Code { match self { + Error::WithCustomErrorCode(code, _) => *code, Error::IndexNotFound(_) => Code::IndexNotFound, Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound, diff --git a/index-scheduler/src/index_mapper/index_map.rs b/index-scheduler/src/index_mapper/index_map.rs index d140d4944..9bed4fe5d 100644 --- a/index-scheduler/src/index_mapper/index_map.rs +++ b/index-scheduler/src/index_mapper/index_map.rs @@ -5,6 +5,7 @@ use std::collections::BTreeMap; use std::path::Path; use std::time::Duration; +use meilisearch_types::heed::flags::Flags; use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions}; use meilisearch_types::milli::Index; use time::OffsetDateTime; @@ -53,6 +54,7 @@ pub struct IndexMap { pub struct ClosingIndex { uuid: Uuid, closing_event: EnvClosingEvent, + enable_mdb_writemap: bool, map_size: usize, generation: usize, } @@ -68,6 +70,7 @@ impl ClosingIndex { pub fn wait_timeout(self, timeout: Duration) -> Option { self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex { uuid: self.uuid, + enable_mdb_writemap: self.enable_mdb_writemap, map_size: self.map_size, generation: self.generation, }) @@ -76,6 +79,7 @@ impl ClosingIndex { pub struct ReopenableIndex { uuid: Uuid, + enable_mdb_writemap: bool, map_size: usize, generation: usize, } @@ -103,7 +107,7 @@ impl ReopenableIndex { return Ok(()); } map.unavailable.remove(&self.uuid); - map.create(&self.uuid, path, None, self.map_size)?; + map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?; } Ok(()) } @@ -170,16 +174,17 @@ impl IndexMap { uuid: &Uuid, path: &Path, date: Option<(OffsetDateTime, OffsetDateTime)>, + enable_mdb_writemap: bool, map_size: usize, ) -> Result { if !matches!(self.get_unavailable(uuid), Missing) { panic!("Attempt to open an index that was unavailable"); } - let index = create_or_open_index(path, date, map_size)?; + let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?; match self.available.insert(*uuid, index.clone()) { InsertionOutcome::InsertedNew => (), InsertionOutcome::Evicted(evicted_uuid, evicted_index) => { - self.close(evicted_uuid, evicted_index, 0); + self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0); } InsertionOutcome::Replaced(_) => { panic!("Attempt to open an index that was already opened") @@ -212,17 +217,30 @@ impl IndexMap { /// | Closing | Closing | /// | Available | Closing | /// - pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) { + pub fn close_for_resize( + &mut self, + uuid: &Uuid, + enable_mdb_writemap: bool, + map_size_growth: usize, + ) { let Some(index) = self.available.remove(uuid) else { return; }; - self.close(*uuid, index, map_size_growth); + self.close(*uuid, index, enable_mdb_writemap, map_size_growth); } - fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) { + fn close( + &mut self, + uuid: Uuid, + index: Index, + enable_mdb_writemap: bool, + map_size_growth: usize, + ) { let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth; let closing_event = index.prepare_for_closing(); let generation = self.next_generation(); - self.unavailable - .insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation })); + self.unavailable.insert( + uuid, + Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }), + ); } /// Attempts to delete and index. @@ -282,11 +300,15 @@ impl IndexMap { fn create_or_open_index( path: &Path, date: Option<(OffsetDateTime, OffsetDateTime)>, + enable_mdb_writemap: bool, map_size: usize, ) -> Result { let mut options = EnvOpenOptions::new(); options.map_size(clamp_to_page_size(map_size)); options.max_readers(1024); + if enable_mdb_writemap { + unsafe { options.flag(Flags::MdbWriteMap) }; + } if let Some((created, updated)) = date { Ok(Index::new_with_creation_dates(options, path, created, updated)?) diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs index 2bf6f46ad..18aed42b0 100644 --- a/index-scheduler/src/index_mapper/mod.rs +++ b/index-scheduler/src/index_mapper/mod.rs @@ -66,6 +66,8 @@ pub struct IndexMapper { index_base_map_size: usize, /// The quantity by which the map size of an index is incremented upon reopening, in bytes. index_growth_amount: usize, + /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not. + enable_mdb_writemap: bool, pub indexer_config: Arc, } @@ -88,8 +90,17 @@ pub enum IndexStatus { pub struct IndexStats { /// Number of documents in the index. pub number_of_documents: u64, - /// Size of the index' DB, in bytes. + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, /// Association of every field name with the number of times it occurs in the documents. pub field_distribution: FieldDistribution, /// Creation date of the index. @@ -105,10 +116,10 @@ impl IndexStats { /// /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`. pub fn new(index: &Index, rtxn: &RoTxn) -> Result { - let database_size = index.on_disk_size()?; Ok(IndexStats { number_of_documents: index.number_of_documents(rtxn)?, - database_size, + database_size: index.on_disk_size()?, + used_database_size: index.used_size()?, field_distribution: index.field_distribution(rtxn)?, created_at: index.created_at(rtxn)?, updated_at: index.updated_at(rtxn)?, @@ -123,15 +134,22 @@ impl IndexMapper { index_base_map_size: usize, index_growth_amount: usize, index_count: usize, + enable_mdb_writemap: bool, indexer_config: IndexerConfig, ) -> Result { + let mut wtxn = env.write_txn()?; + let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?; + let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?; + wtxn.commit()?; + Ok(Self { index_map: Arc::new(RwLock::new(IndexMap::new(index_count))), - index_mapping: env.create_database(Some(INDEX_MAPPING))?, - index_stats: env.create_database(Some(INDEX_STATS))?, + index_mapping, + index_stats, base_path, index_base_map_size, index_growth_amount, + enable_mdb_writemap, indexer_config: Arc::new(indexer_config), }) } @@ -162,6 +180,7 @@ impl IndexMapper { &uuid, &index_path, date, + self.enable_mdb_writemap, self.index_base_map_size, )?; @@ -273,7 +292,11 @@ impl IndexMapper { .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; // We remove the index from the in-memory index map. - self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount); + self.index_map.write().unwrap().close_for_resize( + &uuid, + self.enable_mdb_writemap, + self.index_growth_amount, + ); Ok(()) } @@ -338,6 +361,7 @@ impl IndexMapper { &uuid, &index_path, None, + self.enable_mdb_writemap, self.index_base_map_size, )?; } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index af20ba1ae..0f0ecf52a 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -31,7 +31,7 @@ mod uuid_codec; pub type Result = std::result::Result; pub type TaskId = u32; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -233,6 +233,8 @@ pub struct IndexSchedulerOptions { pub task_db_size: usize, /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index. pub index_base_map_size: usize, + /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not. + pub enable_mdb_writemap: bool, /// The size, in bytes, by which the map size of an index is increased when it resized due to being full. pub index_growth_amount: usize, /// The number of indexes that can be concurrently opened in memory. @@ -374,6 +376,11 @@ impl IndexScheduler { std::fs::create_dir_all(&options.indexes_path)?; std::fs::create_dir_all(&options.dumps_path)?; + if cfg!(windows) && options.enable_mdb_writemap { + // programmer error if this happens: in normal use passing the option on Windows is an error in main + panic!("Windows doesn't support the MDB_WRITEMAP LMDB option"); + } + let task_db_size = clamp_to_page_size(options.task_db_size); let budget = if options.indexer_config.skip_index_budget { IndexBudget { @@ -396,25 +403,37 @@ impl IndexScheduler { .open(options.tasks_path)?; let file_store = FileStore::new(&options.update_file_path)?; + let mut wtxn = env.write_txn()?; + let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?; + let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?; + let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?; + let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?; + let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?; + let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?; + let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?; + let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?; + wtxn.commit()?; + // allow unreachable_code to get rids of the warning in the case of a test build. let this = Self { must_stop_processing: MustStopProcessing::default(), processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), file_store, - all_tasks: env.create_database(Some(db_name::ALL_TASKS))?, - status: env.create_database(Some(db_name::STATUS))?, - kind: env.create_database(Some(db_name::KIND))?, - index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?, - canceled_by: env.create_database(Some(db_name::CANCELED_BY))?, - enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?, - started_at: env.create_database(Some(db_name::STARTED_AT))?, - finished_at: env.create_database(Some(db_name::FINISHED_AT))?, + all_tasks, + status, + kind, + index_tasks, + canceled_by, + enqueued_at, + started_at, + finished_at, index_mapper: IndexMapper::new( &env, options.indexes_path, budget.map_size, options.index_growth_amount, budget.index_count, + options.enable_mdb_writemap, options.indexer_config, )?, env, @@ -554,10 +573,16 @@ impl IndexScheduler { &self.index_mapper.indexer_config } + /// Return the real database size (i.e.: The size **with** the free pages) pub fn size(&self) -> Result { Ok(self.env.real_disk_size()?) } + /// Return the used database size (i.e.: The size **without** the free pages) + pub fn used_size(&self) -> Result { + Ok(self.env.non_free_pages_size()?) + } + /// Return the index corresponding to the name. /// /// * If the index wasn't opened before, the index will be opened. @@ -737,6 +762,38 @@ impl IndexScheduler { Ok(tasks) } + /// The returned structure contains: + /// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`. + /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. + /// 3. The number of times the properties appeared. + pub fn get_stats(&self) -> Result>> { + let rtxn = self.read_txn()?; + + let mut res = BTreeMap::new(); + + res.insert( + "statuses".to_string(), + enum_iterator::all::() + .map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len()))) + .collect::>>()?, + ); + res.insert( + "types".to_string(), + enum_iterator::all::() + .map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len()))) + .collect::>>()?, + ); + res.insert( + "indexes".to_string(), + self.index_tasks + .iter(&rtxn)? + .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?)) + .collect::>>()?, + ); + + Ok(res) + } + /// Return true iff there is at least one task associated with this index /// that is processing. pub fn is_index_processing(&self, index: &str) -> Result { @@ -1471,6 +1528,7 @@ mod tests { dumps_path: tempdir.path().join("dumps"), task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. + enable_mdb_writemap: false, index_growth_amount: 1000 * 1000, // 1 MB index_count: 5, indexer_config, @@ -2017,6 +2075,105 @@ mod tests { snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); } + #[test] + fn document_addition_and_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + index_scheduler + .register(KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); + } + + #[test] + fn document_deletion_and_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + index_scheduler + .register(KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1"), S("2")], + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap(); + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + // The deletion should have failed because it can't create an index + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion"); + + // The addition should works + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); + } + #[test] fn do_not_batch_task_of_different_indexes() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap new file mode 100644 index 000000000..b27288a0f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"documentDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap new file mode 100644 index 000000000..2b56b71d1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap @@ -0,0 +1,9 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap new file mode 100644 index 000000000..d26e62bff --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap new file mode 100644 index 000000000..e0f371120 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"documentDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap new file mode 100644 index 000000000..1d4aa24e2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap new file mode 100644 index 000000000..0f9dfd3e6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap new file mode 100644 index 000000000..8204d059b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap @@ -0,0 +1,17 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 1, + "doggo": "jean bob" + }, + { + "id": 2, + "catto": "jorts" + }, + { + "id": 3, + "doggo": "bork" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap new file mode 100644 index 000000000..5753db7e6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap new file mode 100644 index 000000000..0b6191f9e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index 97f437bed..3971d9116 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -466,7 +466,7 @@ impl IndexScheduler { } } Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => { - assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter); + assert_eq!(kind.as_kind(), Kind::DocumentDeletion); let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter { ref index_uid, ref filter_expr, diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index 2a02776bd..e74f1707c 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -45,6 +45,11 @@ impl AuthController { self.store.size() } + /// Return the used size of the `AuthController` database in bytes. + pub fn used_size(&self) -> Result { + self.store.used_size() + } + pub fn create_key(&self, create_key: CreateApiKey) -> Result { match self.store.get_api_key(create_key.uid)? { Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())), diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 5c2776154..e6e30d18d 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -55,9 +55,11 @@ impl HeedAuthStore { let path = path.as_ref().join(AUTH_DB_PATH); create_dir_all(&path)?; let env = Arc::new(open_auth_store_env(path.as_ref())?); - let keys = env.create_database(Some(KEY_DB_NAME))?; + let mut wtxn = env.write_txn()?; + let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?; let action_keyid_index_expiration = - env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; + env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; + wtxn.commit()?; Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) } @@ -73,6 +75,11 @@ impl HeedAuthStore { Ok(self.env.real_disk_size()?) } + /// Return the number of bytes actually used in the database + pub fn used_size(&self) -> Result { + Ok(self.env.non_free_pages_size()?) + } + pub fn set_drop_on_close(&mut self, v: bool) { self.should_close_on_drop = v; } diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs index 3e6ec8b96..bbaa42dc0 100644 --- a/meilisearch-types/src/deserr/mod.rs +++ b/meilisearch-types/src/deserr/mod.rs @@ -150,6 +150,7 @@ make_missing_field_convenience_builder!(MissingApiKeyActions, missing_api_key_ac make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_expires_at); make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes); make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes); +make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter); // Integrate a sub-error into a [`DeserrError`] by taking its error message but using // the default error code (C) from `Self` diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index bcd8320c9..1509847b7 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -214,12 +214,12 @@ InvalidApiKeyUid , InvalidRequest , BAD_REQUEST ; InvalidContentType , InvalidRequest , UNSUPPORTED_MEDIA_TYPE ; InvalidDocumentCsvDelimiter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFields , InvalidRequest , BAD_REQUEST ; +MissingDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentFilter , InvalidRequest , BAD_REQUEST ; InvalidDocumentGeoField , InvalidRequest , BAD_REQUEST ; InvalidDocumentId , InvalidRequest , BAD_REQUEST ; InvalidDocumentLimit , InvalidRequest , BAD_REQUEST ; InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ; -InvalidDocumentDeleteFilter , InvalidRequest , BAD_REQUEST ; InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs index e746a53b8..693ee4242 100644 --- a/meilisearch-types/src/tasks.rs +++ b/meilisearch-types/src/tasks.rs @@ -395,7 +395,6 @@ impl std::error::Error for ParseTaskStatusError {} pub enum Kind { DocumentAdditionOrUpdate, DocumentDeletion, - DocumentDeletionByFilter, SettingsUpdate, IndexCreation, IndexDeletion, @@ -412,7 +411,6 @@ impl Kind { match self { Kind::DocumentAdditionOrUpdate | Kind::DocumentDeletion - | Kind::DocumentDeletionByFilter | Kind::SettingsUpdate | Kind::IndexCreation | Kind::IndexDeletion @@ -430,7 +428,6 @@ impl Display for Kind { match self { Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"), Kind::DocumentDeletion => write!(f, "documentDeletion"), - Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"), Kind::SettingsUpdate => write!(f, "settingsUpdate"), Kind::IndexCreation => write!(f, "indexCreation"), Kind::IndexDeletion => write!(f, "indexDeletion"), diff --git a/meilisearch/src/analytics/mock_analytics.rs b/meilisearch/src/analytics/mock_analytics.rs index 03aed0189..68c3a7dff 100644 --- a/meilisearch/src/analytics/mock_analytics.rs +++ b/meilisearch/src/analytics/mock_analytics.rs @@ -5,7 +5,7 @@ use actix_web::HttpRequest; use meilisearch_types::InstanceUid; use serde_json::Value; -use super::{find_user_id, Analytics, DocumentDeletionKind}; +use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::routes::tasks::TasksFilterQuery; use crate::Opt; @@ -71,6 +71,8 @@ impl Analytics for MockAnalytics { _request: &HttpRequest, ) { } + fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} + fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {} fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {} fn health_seen(&self, _request: &HttpRequest) {} } diff --git a/meilisearch/src/analytics/mod.rs b/meilisearch/src/analytics/mod.rs index 6223b9db7..c48564dff 100644 --- a/meilisearch/src/analytics/mod.rs +++ b/meilisearch/src/analytics/mod.rs @@ -67,6 +67,12 @@ pub enum DocumentDeletionKind { PerFilter, } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentFetchKind { + PerDocumentId, + Normal { with_filter: bool, limit: usize, offset: usize }, +} + pub trait Analytics: Sync + Send { fn instance_uid(&self) -> Option<&InstanceUid>; @@ -90,6 +96,12 @@ pub trait Analytics: Sync + Send { request: &HttpRequest, ); + // this method should be called to aggregate a fetch documents request + fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); + + // this method should be called to aggregate a fetch documents request + fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest); + // this method should be called to aggregate a add documents request fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 3e40c09e8..afef95ed7 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -23,7 +23,9 @@ use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; -use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH}; +use super::{ + config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH, +}; use crate::analytics::Analytics; use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot}; use crate::routes::indexes::documents::UpdateDocumentsQuery; @@ -72,6 +74,8 @@ pub enum AnalyticsMsg { AggregateAddDocuments(DocumentsAggregator), AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), + AggregateGetFetchDocuments(DocumentsFetchAggregator), + AggregatePostFetchDocuments(DocumentsFetchAggregator), AggregateTasks(TasksAggregator), AggregateHealth(HealthAggregator), } @@ -139,6 +143,8 @@ impl SegmentAnalytics { add_documents_aggregator: DocumentsAggregator::default(), delete_documents_aggregator: DocumentsDeletionAggregator::default(), update_documents_aggregator: DocumentsAggregator::default(), + get_fetch_documents_aggregator: DocumentsFetchAggregator::default(), + post_fetch_documents_aggregator: DocumentsFetchAggregator::default(), get_tasks_aggregator: TasksAggregator::default(), health_aggregator: HealthAggregator::default(), }); @@ -205,6 +211,16 @@ impl super::Analytics for SegmentAnalytics { let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); } + fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { + let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); + let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate)); + } + + fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) { + let aggregate = DocumentsFetchAggregator::from_query(documents_query, request); + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate)); + } + fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) { let aggregate = TasksAggregator::from_query(query, request); let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate)); @@ -225,6 +241,7 @@ impl super::Analytics for SegmentAnalytics { struct Infos { env: String, experimental_enable_metrics: bool, + experimental_reduce_indexing_memory_usage: bool, db_path: bool, import_dump: bool, dump_dir: bool, @@ -258,6 +275,7 @@ impl From for Infos { let Opt { db_path, experimental_enable_metrics, + experimental_reduce_indexing_memory_usage, http_addr, master_key: _, env, @@ -300,6 +318,7 @@ impl From for Infos { Self { env, experimental_enable_metrics, + experimental_reduce_indexing_memory_usage, db_path: db_path != PathBuf::from("./data.ms"), import_dump: import_dump.is_some(), dump_dir: dump_dir != PathBuf::from("dumps/"), @@ -338,6 +357,8 @@ pub struct Segment { add_documents_aggregator: DocumentsAggregator, delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, + get_fetch_documents_aggregator: DocumentsFetchAggregator, + post_fetch_documents_aggregator: DocumentsFetchAggregator, get_tasks_aggregator: TasksAggregator, health_aggregator: HealthAggregator, } @@ -400,6 +421,8 @@ impl Segment { Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg), None => (), @@ -450,6 +473,10 @@ impl Segment { .into_event(&self.user, "Documents Deleted"); let update_documents = std::mem::take(&mut self.update_documents_aggregator) .into_event(&self.user, "Documents Updated"); + let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator) + .into_event(&self.user, "Documents Fetched GET"); + let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator) + .into_event(&self.user, "Documents Fetched POST"); let get_tasks = std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen"); let health = @@ -473,6 +500,12 @@ impl Segment { if let Some(update_documents) = update_documents { let _ = self.batcher.push(update_documents).await; } + if let Some(get_fetch_documents) = get_fetch_documents { + let _ = self.batcher.push(get_fetch_documents).await; + } + if let Some(post_fetch_documents) = post_fetch_documents { + let _ = self.batcher.push(post_fetch_documents).await; + } if let Some(get_tasks) = get_tasks { let _ = self.batcher.push(get_tasks).await; } @@ -1135,3 +1168,76 @@ impl HealthAggregator { }) } } + +#[derive(Default, Serialize)] +pub struct DocumentsFetchAggregator { + #[serde(skip)] + timestamp: Option, + + // context + #[serde(rename = "user-agent")] + user_agents: HashSet, + + #[serde(rename = "requests.max_limit")] + total_received: usize, + + // a call on ../documents/:doc_id + per_document_id: bool, + // if a filter was used + per_filter: bool, + + // pagination + #[serde(rename = "pagination.max_limit")] + max_limit: usize, + #[serde(rename = "pagination.max_offset")] + max_offset: usize, +} + +impl DocumentsFetchAggregator { + pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self { + let (limit, offset) = match query { + DocumentFetchKind::PerDocumentId => (1, 0), + DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset), + }; + Self { + timestamp: Some(OffsetDateTime::now_utc()), + user_agents: extract_user_agents(request).into_iter().collect(), + total_received: 1, + per_document_id: matches!(query, DocumentFetchKind::PerDocumentId), + per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter), + max_limit: limit, + max_offset: offset, + } + } + + /// Aggregate one [DocumentsFetchAggregator] into another. + pub fn aggregate(&mut self, other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + for user_agent in other.user_agents { + self.user_agents.insert(user_agent); + } + + self.total_received = self.total_received.saturating_add(other.total_received); + self.per_document_id |= other.per_document_id; + self.per_filter |= other.per_filter; + + self.max_limit = self.max_limit.max(other.max_limit); + self.max_offset = self.max_offset.max(other.max_offset); + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + // if we had no timestamp it means we never encountered any events and + // thus we don't need to send this event. + let timestamp = self.timestamp?; + + Some(Track { + timestamp: Some(timestamp), + user: user.clone(), + event: event_name.to_string(), + properties: serde_json::to_value(self).ok()?, + ..Default::default() + }) + } +} diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 004f0d143..ca10c4593 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -61,7 +61,7 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, - MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentDeleteFilter, + MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter, MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter, MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge, MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes, diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 67d8bbd5c..bee53f6f8 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -232,6 +232,7 @@ fn open_or_create_database_unchecked( dumps_path: opt.dump_dir.clone(), task_db_size: opt.max_task_db_size.get_bytes() as usize, index_base_map_size: opt.max_index_size.get_bytes() as usize, + enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage, indexer_config: (&opt.indexer_options).try_into()?, autobatching_enabled: true, max_number_of_tasks: 1_000_000, diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 2ab37488c..1b5e918dc 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -29,6 +29,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let (opt, config_read_from) = Opt::try_build()?; + anyhow::ensure!( + !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage), + "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows" + ); + setup(&opt)?; match (opt.env.as_ref(), &opt.master_key) { diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index f6fdf756d..79332f360 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -4,20 +4,32 @@ use prometheus::{ register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, }; -const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[ - 0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015, - 0.002, 0.003, 1.0, -]; +/// Create evenly distributed buckets +fn create_buckets() -> [f64; 29] { + (0..10) + .chain((10..100).step_by(10)) + .chain((100..=1000).step_by(100)) + .map(|i| i as f64 / 1000.) + .collect::>() + .try_into() + .unwrap() +} lazy_static! { - pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( - opts!("http_requests_total", "HTTP requests total"), + pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets(); + pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( + opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"), &["method", "path"] ) .expect("Can't create a metric"); pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = - register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes")) + register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) .expect("Can't create a metric"); + pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!( + "meilisearch_used_db_size_bytes", + "Meilisearch Used DB Size In Bytes" + )) + .expect("Can't create a metric"); pub static ref MEILISEARCH_INDEX_COUNT: IntGauge = register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count")) .expect("Can't create a metric"); @@ -26,11 +38,16 @@ lazy_static! { &["index"] ) .expect("Can't create a metric"); - pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!( + pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!( "http_response_time_seconds", "HTTP response times", &["method", "path"], HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec() ) .expect("Can't create a metric"); + pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!( + opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"), + &["kind", "value"] + ) + .expect("Can't create a metric"); } diff --git a/meilisearch/src/middleware.rs b/meilisearch/src/middleware.rs index 080a52634..a8c981dca 100644 --- a/meilisearch/src/middleware.rs +++ b/meilisearch/src/middleware.rs @@ -52,11 +52,11 @@ where if is_registered_resource { let request_method = req.method().to_string(); histogram_timer = Some( - crate::metrics::HTTP_RESPONSE_TIME_SECONDS + crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS .with_label_values(&[&request_method, request_path]) .start_timer(), ); - crate::metrics::HTTP_REQUESTS_TOTAL + crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL .with_label_values(&[&request_method, request_path]) .inc(); } diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index 8e6ca9006..0511b5033 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -48,6 +48,8 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; +const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = + "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE"; const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; const DEFAULT_DB_PATH: &str = "./data.ms"; @@ -293,6 +295,11 @@ pub struct Opt { #[serde(default)] pub experimental_enable_metrics: bool, + /// Experimental RAM reduction during indexing, do not use in production, see: + #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)] + #[serde(default)] + pub experimental_reduce_indexing_memory_usage: bool, + #[serde(flatten)] #[clap(flatten)] pub indexer_options: IndexerOpts, @@ -385,6 +392,7 @@ impl Opt { #[cfg(all(not(debug_assertions), feature = "analytics"))] no_analytics, experimental_enable_metrics: enable_metrics_route, + experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage, } = self; export_to_env_if_not_present(MEILI_DB_PATH, db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); @@ -426,6 +434,10 @@ impl Opt { MEILI_EXPERIMENTAL_ENABLE_METRICS, enable_metrics_route.to_string(), ); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE, + reduce_indexing_memory_usage.to_string(), + ); indexer_options.export_to_env(); } diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index eb0f5a59e..2afc1b5fb 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -29,7 +29,7 @@ use tempfile::tempfile; use tokio::fs::File; use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter}; -use crate::analytics::{Analytics, DocumentDeletionKind}; +use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind}; use crate::error::MeilisearchHttpError; use crate::error::PayloadError::ReceivePayload; use crate::extractors::authentication::policies::*; @@ -97,10 +97,14 @@ pub async fn get_document( index_scheduler: GuardedData, Data>, document_param: web::Path, params: AwebQueryParameter, + req: HttpRequest, + analytics: web::Data, ) -> Result { let DocumentParam { index_uid, document_id } = document_param.into_inner(); let index_uid = IndexUid::try_from(index_uid)?; + analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req); + let GetDocument { fields } = params.into_inner(); let attributes_to_retrieve = fields.merge_star_and_none(); @@ -161,16 +165,31 @@ pub async fn documents_by_query_post( index_scheduler: GuardedData, Data>, index_uid: web::Path, body: AwebJson, + req: HttpRequest, + analytics: web::Data, ) -> Result { debug!("called with body: {:?}", body); - documents_by_query(&index_scheduler, index_uid, body.into_inner()) + let body = body.into_inner(); + + analytics.post_fetch_documents( + &DocumentFetchKind::Normal { + with_filter: body.filter.is_some(), + limit: body.limit, + offset: body.offset, + }, + &req, + ); + + documents_by_query(&index_scheduler, index_uid, body) } pub async fn get_documents( index_scheduler: GuardedData, Data>, index_uid: web::Path, params: AwebQueryParameter, + req: HttpRequest, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", params); @@ -191,6 +210,15 @@ pub async fn get_documents( filter, }; + analytics.get_fetch_documents( + &DocumentFetchKind::Normal { + with_filter: query.filter.is_some(), + limit: query.limit, + offset: query.offset, + }, + &req, + ); + documents_by_query(&index_scheduler, index_uid, query) } @@ -458,7 +486,7 @@ pub async fn delete_documents_batch( #[derive(Debug, Deserr)] #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] pub struct DocumentDeletionByFilter { - #[deserr(error = DeserrJsonError)] + #[deserr(error = DeserrJsonError, missing_field_error = DeserrJsonError::missing_document_filter)] filter: Value, } @@ -480,8 +508,8 @@ pub async fn delete_documents_by_filter( || -> Result<_, ResponseError> { Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?) }() - // and whatever was the error, the error code should always be an InvalidDocumentDeleteFilter - .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentDeleteFilter))?; + // and whatever was the error, the error code should always be an InvalidDocumentFilter + .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let task: SummarizedTaskView = @@ -540,7 +568,12 @@ fn retrieve_documents>( }; let candidates = if let Some(filter) = filter { - filter.evaluate(&rtxn, index)? + filter.evaluate(&rtxn, index).map_err(|err| match err { + milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { + ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter) + } + e => e.into(), + })? } else { index.documents_ids(&rtxn)? }; diff --git a/meilisearch/src/routes/metrics.rs b/meilisearch/src/routes/metrics.rs index 874a1a5a5..a7d41e33e 100644 --- a/meilisearch/src/routes/metrics.rs +++ b/meilisearch/src/routes/metrics.rs @@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) { pub async fn get_metrics( index_scheduler: GuardedData, Data>, - auth_controller: GuardedData, Data>, + auth_controller: Data, ) -> Result { let auth_filters = index_scheduler.filters(); if !auth_filters.all_indexes_authorized() { @@ -28,10 +28,10 @@ pub async fn get_metrics( return Err(error); } - let response = - create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?; + let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?; crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64); + crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64); crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64); for (index, value) in response.indexes.iter() { @@ -40,6 +40,14 @@ pub async fn get_metrics( .set(value.number_of_documents as i64); } + for (kind, value) in index_scheduler.get_stats()? { + for (value, count) in value { + crate::metrics::MEILISEARCH_NB_TASKS + .with_label_values(&[&kind, &value]) + .set(count as i64); + } + } + let encoder = TextEncoder::new(); let mut buffer = vec![]; encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics"); diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 51340ac1b..57d670b5f 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -231,6 +231,8 @@ pub async fn running() -> HttpResponse { #[serde(rename_all = "camelCase")] pub struct Stats { pub database_size: u64, + #[serde(skip)] + pub used_database_size: u64, #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] pub last_update: Option, pub indexes: BTreeMap, @@ -259,6 +261,7 @@ pub fn create_all_stats( let mut last_task: Option = None; let mut indexes = BTreeMap::new(); let mut database_size = 0; + let mut used_database_size = 0; for index_uid in index_scheduler.index_names()? { // Accumulate the size of all indexes, even unauthorized ones, so @@ -266,6 +269,7 @@ pub fn create_all_stats( // See for context. let stats = index_scheduler.index_stats(&index_uid)?; database_size += stats.inner_stats.database_size; + used_database_size += stats.inner_stats.used_database_size; if !filters.is_index_authorized(&index_uid) { continue; @@ -278,10 +282,14 @@ pub fn create_all_stats( } database_size += index_scheduler.size()?; + used_database_size += index_scheduler.used_size()?; database_size += auth_controller.size()?; - database_size += index_scheduler.compute_update_file_size()?; + used_database_size += auth_controller.used_size()?; + let update_file_size = index_scheduler.compute_update_file_size()?; + database_size += update_file_size; + used_database_size += update_file_size; - let stats = Stats { database_size, last_update: last_task, indexes }; + let stats = Stats { database_size, used_database_size, last_update: last_task, indexes }; Ok(stats) } diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index cab0f7197..2713d0988 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -99,7 +99,7 @@ pub struct DetailsView { #[serde(skip_serializing_if = "Option::is_none")] pub deleted_tasks: Option>, #[serde(skip_serializing_if = "Option::is_none")] - pub original_filter: Option, + pub original_filter: Option>, #[serde(skip_serializing_if = "Option::is_none")] pub dump_uid: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -131,12 +131,13 @@ impl From
for DetailsView { } => DetailsView { provided_ids: Some(received_document_ids), deleted_documents: Some(deleted_documents), + original_filter: Some(None), ..DetailsView::default() }, Details::DocumentDeletionByFilter { original_filter, deleted_documents } => { DetailsView { provided_ids: Some(0), - original_filter: Some(original_filter), + original_filter: Some(Some(original_filter)), deleted_documents: Some(deleted_documents), ..DetailsView::default() } @@ -148,7 +149,7 @@ impl From
for DetailsView { DetailsView { matched_tasks: Some(matched_tasks), canceled_tasks: Some(canceled_tasks), - original_filter: Some(original_filter), + original_filter: Some(Some(original_filter)), ..DetailsView::default() } } @@ -156,7 +157,7 @@ impl From
for DetailsView { DetailsView { matched_tasks: Some(matched_tasks), deleted_tasks: Some(deleted_tasks), - original_filter: Some(original_filter), + original_filter: Some(Some(original_filter)), ..DetailsView::default() } } @@ -729,7 +730,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index ef4a7eaa1..58fba4481 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -16,8 +16,11 @@ pub static AUTHORIZATIONS: Lazy hashset!{"search", "*"}, ("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"}, ("GET", "/indexes/products/documents") => hashset!{"documents.get", "documents.*", "*"}, + ("POST", "/indexes/products/documents/fetch") => hashset!{"documents.get", "documents.*", "*"}, ("GET", "/indexes/products/documents/0") => hashset!{"documents.get", "documents.*", "*"}, ("DELETE", "/indexes/products/documents/0") => hashset!{"documents.delete", "documents.*", "*"}, + ("POST", "/indexes/products/documents/delete-batch") => hashset!{"documents.delete", "documents.*", "*"}, + ("POST", "/indexes/products/documents/delete") => hashset!{"documents.delete", "documents.*", "*"}, ("GET", "/tasks") => hashset!{"tasks.get", "tasks.*", "*"}, ("DELETE", "/tasks") => hashset!{"tasks.delete", "tasks.*", "*"}, ("GET", "/tasks?indexUid=products") => hashset!{"tasks.get", "tasks.*", "*"}, diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index b72dc40f3..7dab16a25 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -180,9 +180,9 @@ async fn get_all_documents_bad_filter() { snapshot!(json_string!(response), @r###" { "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese", - "code": "invalid_search_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); } @@ -547,9 +547,9 @@ async fn delete_document_by_filter() { snapshot!(json_string!(response), @r###" { "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.", - "code": "invalid_document_delete_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); @@ -559,9 +559,9 @@ async fn delete_document_by_filter() { snapshot!(json_string!(response), @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", - "code": "invalid_document_delete_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); @@ -571,9 +571,21 @@ async fn delete_document_by_filter() { snapshot!(json_string!(response), @r###" { "message": "Sending an empty filter is forbidden.", - "code": "invalid_document_delete_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_document_delete_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + } + "###); + + // do not send any filter + let (response, code) = index.delete_document_by_filter(json!({})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Missing field `filter`", + "code": "missing_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_document_filter" } "###); @@ -630,9 +642,9 @@ async fn delete_document_by_filter() { }, "error": { "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese", - "code": "invalid_search_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" }, "duration": "[duration]", "enqueuedAt": "[date]", @@ -664,9 +676,9 @@ async fn delete_document_by_filter() { }, "error": { "message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts", - "code": "invalid_search_filter", + "code": "invalid_document_filter", "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" }, "duration": "[duration]", "enqueuedAt": "[date]", @@ -748,4 +760,27 @@ async fn fetch_document_by_filter() { "link": "https://docs.meilisearch.com/errors#invalid_document_filter" } "###); + + let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + } + "###); + + let (response, code) = + index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + } + "###); } diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index a9a2969bb..f314e8800 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() { index.wait_task(1).await; let expected_response = json!({ - "message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.", + "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", "code": "invalid_search_sort", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_sort" diff --git a/meilisearch/tests/tasks/errors.rs b/meilisearch/tests/tasks/errors.rs index 065ff1aa9..830c4c8e7 100644 --- a/meilisearch/tests/tasks/errors.rs +++ b/meilisearch/tests/tasks/errors.rs @@ -97,7 +97,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -108,7 +108,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" @@ -119,7 +119,7 @@ async fn task_bad_types() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", + "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.", "code": "invalid_task_types", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_task_types" diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs index e9b5a2325..4ac134871 100644 --- a/meilisearch/tests/tasks/mod.rs +++ b/meilisearch/tests/tasks/mod.rs @@ -413,7 +413,7 @@ async fn test_summarized_document_addition_or_update() { } #[actix_web::test] -async fn test_summarized_delete_batch() { +async fn test_summarized_delete_documents_by_batch() { let server = Server::new().await; let index = server.index("test"); index.delete_batch(vec![1, 2, 3]).await; @@ -430,7 +430,8 @@ async fn test_summarized_delete_batch() { "canceledBy": null, "details": { "providedIds": 3, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": { "message": "Index `test` not found.", @@ -460,7 +461,8 @@ async fn test_summarized_delete_batch() { "canceledBy": null, "details": { "providedIds": 1, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": null, "duration": "[duration]", @@ -472,7 +474,100 @@ async fn test_summarized_delete_batch() { } #[actix_web::test] -async fn test_summarized_delete_document() { +async fn test_summarized_delete_documents_by_filter() { + let server = Server::new().await; + let index = server.index("test"); + + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(None).await; + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": { + "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese", + "code": "invalid_document_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_document_filter" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await; + index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await; + index.wait_task(4).await; + let (task, _) = index.get_task(4).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 4, + "indexUid": "test", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 0, + "deletedDocuments": 0, + "originalFilter": "\"doggo = bernese\"" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_delete_document_by_id() { let server = Server::new().await; let index = server.index("test"); index.delete_document(1).await; @@ -489,7 +584,8 @@ async fn test_summarized_delete_document() { "canceledBy": null, "details": { "providedIds": 1, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": { "message": "Index `test` not found.", @@ -519,7 +615,8 @@ async fn test_summarized_delete_document() { "canceledBy": null, "details": { "providedIds": 1, - "deletedDocuments": 0 + "deletedDocuments": 0, + "originalFilter": null }, "error": null, "duration": "[duration]", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index de0f4e31d..138103723 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -25,8 +25,13 @@ flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" -grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] } -heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] } +grenad = { version = "0.4.4", default-features = false, features = [ + "tempfile", +] } +heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-features = false, features = [ + "lmdb", + "sync-read-txn", +] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } memmap2 = "0.5.10" @@ -39,12 +44,17 @@ rstar = { version = "0.10.0", features = ["serde"] } serde = { version = "1.0.160", features = ["derive"] } serde_json = { version = "1.0.95", features = ["preserve_order"] } slice-group-by = "0.3.0" -smallstr = { version = "0.3.0", features = ["serde"] } +smallstr = { version = "0.3.0", features = ["serde"] } smallvec = "1.10.0" smartstring = "1.0.1" tempfile = "3.5.0" thiserror = "1.0.40" -time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] } +time = { version = "0.3.20", features = [ + "serde-well-known", + "formatting", + "parsing", + "macros", +] } uuid = { version = "1.3.1", features = ["v4"] } filter-parser = { path = "../filter-parser" } @@ -63,13 +73,10 @@ big_s = "1.0.2" insta = "1.29.0" maplit = "1.0.2" md5 = "0.7.0" -rand = {version = "0.8.5", features = ["small_rng"] } - -[target.'cfg(fuzzing)'.dev-dependencies] -fuzzcheck = "0.12.1" +rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = [ "charabia/default" ] +all-tokenizations = ["charabia/default"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 43b31187d..7c037b3bf 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -111,7 +111,6 @@ pub enum Error { Io(#[from] io::Error), } -#[cfg(test)] pub fn objects_from_json_value(json: serde_json::Value) -> Vec { let documents = match json { object @ serde_json::Value::Object(_) => vec![object], @@ -141,7 +140,6 @@ macro_rules! documents { }}; } -#[cfg(test)] pub fn documents_batch_reader_from_objects( objects: impl IntoIterator, ) -> DocumentsBatchReader>> { diff --git a/milli/src/error.rs b/milli/src/error.rs index 7f0faf2fd..8d55eabbd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -126,7 +126,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco InvalidSortableAttribute { field: String, valid_fields: BTreeSet }, #[error("{}", HeedError::BadOpenOptions)] InvalidLmdbOpenOptions, - #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")] + #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")] SortRankingRuleMissing, #[error("The database file is in an invalid state.")] InvalidStoreFile, diff --git a/milli/src/external_documents_ids.rs b/milli/src/external_documents_ids.rs index 2cecd1abe..36b147336 100644 --- a/milli/src/external_documents_ids.rs +++ b/milli/src/external_documents_ids.rs @@ -106,22 +106,30 @@ impl<'a> ExternalDocumentsIds<'a> { map } + /// Return an fst of the combined hard and soft deleted ID. + pub fn to_fst<'b>(&'b self) -> fst::Result>>> { + if self.soft.is_empty() { + return Ok(Cow::Borrowed(&self.hard)); + } + let union_op = self.hard.op().add(&self.soft).r#union(); + + let mut iter = union_op.into_stream(); + let mut new_hard_builder = fst::MapBuilder::memory(); + while let Some((external_id, marked_docids)) = iter.next() { + let value = indexed_last_value(marked_docids).unwrap(); + if value != DELETED_ID { + new_hard_builder.insert(external_id, value)?; + } + } + + drop(iter); + + Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?)) + } + fn merge_soft_into_hard(&mut self) -> fst::Result<()> { if self.soft.len() >= self.hard.len() / 2 { - let union_op = self.hard.op().add(&self.soft).r#union(); - - let mut iter = union_op.into_stream(); - let mut new_hard_builder = fst::MapBuilder::memory(); - while let Some((external_id, marked_docids)) = iter.next() { - let value = indexed_last_value(marked_docids).unwrap(); - if value != DELETED_ID { - new_hard_builder.insert(external_id, value)?; - } - } - - drop(iter); - - self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?; + self.hard = self.to_fst()?.into_owned(); self.soft = fst::Map::default().map_data(Cow::Owned)?; } diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 1bd132974..01ce523ba 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -49,7 +49,7 @@ impl CboRoaringBitmapCodec { } else { // Otherwise, it means we used the classic RoaringBitmapCodec and // that the header takes threshold integers. - RoaringBitmap::deserialize_from(bytes) + RoaringBitmap::deserialize_unchecked_from(bytes) } } @@ -69,7 +69,7 @@ impl CboRoaringBitmapCodec { vec.push(integer); } } else { - roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?; + roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?; } } diff --git a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs index 8fae9b8fd..6cec0eb44 100644 --- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs @@ -8,7 +8,7 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec { type DItem = RoaringBitmap; fn bytes_decode(bytes: &[u8]) -> Option { - RoaringBitmap::deserialize_from(bytes).ok() + RoaringBitmap::deserialize_unchecked_from(bytes).ok() } } diff --git a/milli/src/index.rs b/milli/src/index.rs index ad53e79ea..1ccef13dd 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,10 +21,9 @@ use crate::heed_codec::facet::{ }; use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec}; use crate::{ - default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, - DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, - FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, - Search, U8StrStrCodec, BEU16, BEU32, + default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, + FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, + Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, }; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; @@ -111,9 +110,6 @@ pub struct Index { /// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed. pub exact_word_prefix_docids: Database, - /// Maps a word and a document id (u32) to all the positions where the given word appears. - pub docid_word_positions: Database, - /// Maps the proximity between a pair of words with all the docids where this relation appears. pub word_pair_proximity_docids: Database, /// Maps the proximity between a pair of word and prefix with all the docids where this relation appears. @@ -170,33 +166,45 @@ impl Index { unsafe { options.flag(Flags::MdbAlwaysFreePages) }; let env = options.open(path)?; - let main = env.create_poly_database(Some(MAIN))?; - let word_docids = env.create_database(Some(WORD_DOCIDS))?; - let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?; - let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?; - let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?; - let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?; - let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?; - let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?; + let mut wtxn = env.write_txn()?; + let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; + let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?; + let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?; + let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?; + let exact_word_prefix_docids = + env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?; + let word_pair_proximity_docids = + env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?; + let script_language_docids = + env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?; let word_prefix_pair_proximity_docids = - env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; + env.create_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; let prefix_word_pair_proximity_docids = - env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; - let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?; - let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?; - let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?; - let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?; - let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; - let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?; - let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?; - let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?; - let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?; - let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?; + env.create_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; + let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?; + let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?; + let field_id_word_count_docids = + env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?; + let word_prefix_position_docids = + env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?; + let word_prefix_fid_docids = + env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; + let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; + let facet_id_string_docids = + env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; + let facet_id_exists_docids = + env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; + let facet_id_is_null_docids = + env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?; + let facet_id_is_empty_docids = + env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?; - let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?; + let field_id_docid_facet_f64s = + env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?; let field_id_docid_facet_strings = - env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?; - let documents = env.create_database(Some(DOCUMENTS))?; + env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?; + let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?; + wtxn.commit()?; Index::set_creation_dates(&env, main, created_at, updated_at)?; @@ -207,7 +215,6 @@ impl Index { exact_word_docids, word_prefix_docids, exact_word_prefix_docids, - docid_word_positions, word_pair_proximity_docids, script_language_docids, word_prefix_pair_proximity_docids, @@ -1459,9 +1466,9 @@ pub(crate) mod tests { db_snap!(index, field_distribution, @r###" - age 1 - id 2 - name 2 + age 1 | + id 2 | + name 2 | "### ); @@ -1479,9 +1486,9 @@ pub(crate) mod tests { db_snap!(index, field_distribution, @r###" - age 1 - id 2 - name 2 + age 1 | + id 2 | + name 2 | "### ); @@ -1495,9 +1502,9 @@ pub(crate) mod tests { db_snap!(index, field_distribution, @r###" - has_dog 1 - id 2 - name 2 + has_dog 1 | + id 2 | + name 2 | "### ); } diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 48699e76f..e7acdde2c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -5,52 +5,6 @@ #[global_allocator] pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; -// #[cfg(test)] -// pub mod allocator { -// use std::alloc::{GlobalAlloc, System}; -// use std::sync::atomic::{self, AtomicI64}; - -// #[global_allocator] -// pub static ALLOC: CountingAlloc = CountingAlloc { -// max_resident: AtomicI64::new(0), -// resident: AtomicI64::new(0), -// allocated: AtomicI64::new(0), -// }; - -// pub struct CountingAlloc { -// pub max_resident: AtomicI64, -// pub resident: AtomicI64, -// pub allocated: AtomicI64, -// } -// unsafe impl GlobalAlloc for CountingAlloc { -// unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 { -// self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst); -// let old_resident = -// self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst); - -// let resident = old_resident + layout.size() as i64; -// self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst); - -// // if layout.size() > 1_000_000 { -// // eprintln!( -// // "allocating {} with new resident size: {resident}", -// // layout.size() / 1_000_000 -// // ); -// // // let trace = std::backtrace::Backtrace::capture(); -// // // let t = trace.to_string(); -// // // eprintln!("{t}"); -// // } - -// System.alloc(layout) -// } - -// unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) { -// self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed); -// System.dealloc(ptr, layout) -// } -// } -// } - #[macro_use] pub mod documents; diff --git a/milli/src/search/new/distinct.rs b/milli/src/search/new/distinct.rs index fbb7550a9..fff96bd5d 100644 --- a/milli/src/search/new/distinct.rs +++ b/milli/src/search/new/distinct.rs @@ -26,7 +26,6 @@ pub fn apply_distinct_rule( ctx: &mut SearchContext, field_id: u16, candidates: &RoaringBitmap, - // TODO: add a universe here, such that the `excluded` are a subset of the universe? ) -> Result { let mut excluded = RoaringBitmap::new(); let mut remaining = RoaringBitmap::new(); diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs index dc9c95d3d..6e0381295 100644 --- a/milli/src/search/new/exact_attribute.rs +++ b/milli/src/search/new/exact_attribute.rs @@ -206,7 +206,7 @@ impl State { )?; intersection &= &candidates; if !intersection.is_empty() { - // TODO: although not really worth it in terms of performance, + // Although not really worth it in terms of performance, // if would be good to put this in cache for the sake of consistency let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize { ctx.index diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index d8f6836e7..dd25ddd4a 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -46,7 +46,7 @@ use super::logger::SearchLogger; use super::query_graph::QueryNode; use super::ranking_rule_graph::{ ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph, - RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, + RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph, }; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; @@ -54,6 +54,12 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::ranking_rule_graph::PathVisitor; use crate::{Result, TermsMatchingStrategy}; +pub type Words = GraphBasedRankingRule; +impl GraphBasedRankingRule { + pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { + Self::new_with_id("words".to_owned(), Some(terms_matching_strategy)) + } +} pub type Proximity = GraphBasedRankingRule; impl GraphBasedRankingRule { pub fn new(terms_matching_strategy: Option) -> Self { diff --git a/milli/src/search/new/interner.rs b/milli/src/search/new/interner.rs index ebf18f38c..c2d325a86 100644 --- a/milli/src/search/new/interner.rs +++ b/milli/src/search/new/interner.rs @@ -32,7 +32,7 @@ impl Interned { #[derive(Clone)] pub struct DedupInterner { stable_store: Vec, - lookup: FxHashMap>, // TODO: Arc + lookup: FxHashMap>, } impl Default for DedupInterner { fn default() -> Self { diff --git a/milli/src/search/new/limits.rs b/milli/src/search/new/limits.rs index 33a5a4a6c..d08946424 100644 --- a/milli/src/search/new/limits.rs +++ b/milli/src/search/new/limits.rs @@ -1,5 +1,4 @@ /// Maximum number of tokens we consider in a single search. -// TODO: Loic, find proper value here so we don't overflow the interner. pub const MAX_TOKEN_COUNT: usize = 1_000; /// Maximum number of prefixes that can be derived from a single word. diff --git a/milli/src/search/new/logger/visual.rs b/milli/src/search/new/logger/visual.rs index 1cbe007d3..8df56da89 100644 --- a/milli/src/search/new/logger/visual.rs +++ b/milli/src/search/new/logger/visual.rs @@ -4,7 +4,6 @@ use std::io::{BufWriter, Write}; use std::path::{Path, PathBuf}; use std::time::Instant; -// use rand::random; use roaring::RoaringBitmap; use crate::search::new::interner::Interned; @@ -13,6 +12,7 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::ranking_rule_graph::{ Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph, + WordsCondition, WordsGraph, }; use crate::search::new::ranking_rules::BoxRankingRule; use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger}; @@ -24,11 +24,12 @@ pub enum SearchEvents { RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 }, RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 }, ExtendResults { new: Vec }, - WordsGraph { query_graph: QueryGraph }, ProximityGraph { graph: RankingRuleGraph }, ProximityPaths { paths: Vec>> }, TypoGraph { graph: RankingRuleGraph }, TypoPaths { paths: Vec>> }, + WordsGraph { graph: RankingRuleGraph }, + WordsPaths { paths: Vec>> }, FidGraph { graph: RankingRuleGraph }, FidPaths { paths: Vec>> }, PositionGraph { graph: RankingRuleGraph }, @@ -139,8 +140,11 @@ impl SearchLogger for VisualSearchLogger { let Some(location) = self.location.last() else { return }; match location { Location::Words => { - if let Some(query_graph) = state.downcast_ref::() { - self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() }); + if let Some(graph) = state.downcast_ref::>() { + self.events.push(SearchEvents::WordsGraph { graph: graph.clone() }); + } + if let Some(paths) = state.downcast_ref::>>>() { + self.events.push(SearchEvents::WordsPaths { paths: paths.clone() }); } } Location::Typo => { @@ -329,7 +333,6 @@ impl<'ctx> DetailedLoggerFinish<'ctx> { SearchEvents::ExtendResults { new } => { self.write_extend_results(new)?; } - SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?, SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?, SearchEvents::ProximityPaths { paths } => { self.write_rr_graph_paths::(paths)?; @@ -338,6 +341,10 @@ impl<'ctx> DetailedLoggerFinish<'ctx> { SearchEvents::TypoPaths { paths } => { self.write_rr_graph_paths::(paths)?; } + SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?, + SearchEvents::WordsPaths { paths } => { + self.write_rr_graph_paths::(paths)?; + } SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?, SearchEvents::FidPaths { paths } => { self.write_rr_graph_paths::(paths)?; @@ -455,7 +462,7 @@ fill: \"#B6E2D3\" shape: class max_nbr_typo: {}", term_subset.description(ctx), - term_subset.max_nbr_typos(ctx) + term_subset.max_typo_cost(ctx) )?; for w in term_subset.all_single_words_except_prefix_db(ctx)? { @@ -482,13 +489,6 @@ fill: \"#B6E2D3\" } Ok(()) } - fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> { - self.make_new_file_for_internal_state_if_needed()?; - - self.write_query_graph(&qg)?; - - Ok(()) - } fn write_rr_graph( &mut self, graph: &RankingRuleGraph, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 7e8426bf9..a28f42f35 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -15,11 +15,7 @@ mod resolve_query_graph; mod small_bitmap; mod exact_attribute; -// TODO: documentation + comments -// implementation is currently an adaptation of the previous implementation to fit with the new model mod sort; -// TODO: documentation + comments -mod words; #[cfg(test)] mod tests; @@ -43,10 +39,10 @@ use ranking_rules::{ use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache}; use roaring::RoaringBitmap; use sort::Sort; -use words::Words; use self::geo_sort::GeoSort; pub use self::geo_sort::Strategy as GeoSortStrategy; +use self::graph_based_ranking_rule::Words; use self::interner::Interned; use crate::search::new::distinct::apply_distinct_rule; use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; @@ -202,6 +198,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( let mut sorted_fields = HashSet::new(); let mut geo_sorted = false; + // Don't add the `words` ranking rule if the term matching strategy is `All` + if matches!(terms_matching_strategy, TermsMatchingStrategy::All) { + words = true; + } + let mut ranking_rules: Vec> = vec![]; let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; for rr in settings_ranking_rules { @@ -397,8 +398,8 @@ pub fn execute_search( None }; let bucket_sort_output = if let Some(query_terms) = query_terms { - let graph = QueryGraph::from_query(ctx, &query_terms)?; - located_query_terms = Some(query_terms); + let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?; + located_query_terms = Some(new_located_query_terms); let ranking_rules = get_ranking_rules_for_query_graph_search( ctx, diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 0e7d5a7f3..114eb8c4e 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -88,29 +88,33 @@ pub struct QueryGraph { } impl QueryGraph { - /// Build the query graph from the parsed user search query. + /// Build the query graph from the parsed user search query, return an updated list of the located query terms + /// which contains ngrams. pub fn from_query( ctx: &mut SearchContext, - // NOTE: the terms here must be consecutive + // The terms here must be consecutive terms: &[LocatedQueryTerm], - ) -> Result { + ) -> Result<(QueryGraph, Vec)> { + let mut new_located_query_terms = terms.to_vec(); + let nbr_typos = number_of_typos_allowed(ctx)?; let mut nodes_data: Vec = vec![QueryNodeData::Start, QueryNodeData::End]; let root_node = 0; let end_node = 1; - // TODO: we could consider generalizing to 4,5,6,7,etc. ngrams + // Ee could consider generalizing to 4,5,6,7,etc. ngrams let (mut prev2, mut prev1, mut prev0): (Vec, Vec, Vec) = (vec![], vec![], vec![root_node]); let original_terms_len = terms.len(); for term_idx in 0..original_terms_len { let mut new_nodes = vec![]; + let new_node_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { - term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)), + term_subset: QueryTermSubset::full(terms[term_idx].value), positions: terms[term_idx].positions.clone(), term_ids: term_idx as u8..=term_idx as u8, }), @@ -121,6 +125,7 @@ impl QueryGraph { if let Some(ngram) = query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)? { + new_located_query_terms.push(ngram.clone()); let ngram_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { @@ -136,6 +141,7 @@ impl QueryGraph { if let Some(ngram) = query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)? { + new_located_query_terms.push(ngram.clone()); let ngram_idx = add_node( &mut nodes_data, QueryNodeData::Term(LocatedQueryTermSubset { @@ -167,7 +173,7 @@ impl QueryGraph { let mut graph = QueryGraph { root_node, end_node, nodes }; graph.build_initial_edges(); - Ok(graph) + Ok((graph, new_located_query_terms)) } /// Remove the given nodes, connecting all their predecessors to all their successors. diff --git a/milli/src/search/new/query_term/compute_derivations.rs b/milli/src/search/new/query_term/compute_derivations.rs index c26c4bc6b..d5dfbbcd0 100644 --- a/milli/src/search/new/query_term/compute_derivations.rs +++ b/milli/src/search/new/query_term/compute_derivations.rs @@ -28,14 +28,14 @@ pub enum ZeroOrOneTypo { impl Interned { pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> { let s = ctx.term_interner.get_mut(self); - if s.max_nbr_typos <= 1 && s.one_typo.is_uninit() { + if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() { assert!(s.two_typo.is_uninit()); // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words self.initialize_one_typo_subterm(ctx)?; let s = ctx.term_interner.get_mut(self); assert!(s.one_typo.is_init()); s.two_typo = Lazy::Init(TwoTypoTerm::default()); - } else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() { + } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() { assert!(s.two_typo.is_uninit()); self.initialize_one_and_two_typo_subterm(ctx)?; let s = ctx.term_interner.get_mut(self); @@ -185,7 +185,7 @@ pub fn partially_initialized_term_from_word( original: ctx.word_interner.insert(word.to_owned()), ngram_words: None, is_prefix: false, - max_nbr_typos: 0, + max_levenshtein_distance: 0, zero_typo: <_>::default(), one_typo: Lazy::Init(<_>::default()), two_typo: Lazy::Init(<_>::default()), @@ -256,7 +256,7 @@ pub fn partially_initialized_term_from_word( Ok(QueryTerm { original: word_interned, ngram_words: None, - max_nbr_typos: max_typo, + max_levenshtein_distance: max_typo, is_prefix, zero_typo, one_typo: Lazy::Uninit, @@ -275,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result { fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { let self_mut = ctx.term_interner.get_mut(self); - let QueryTerm { original, is_prefix, one_typo, max_nbr_typos, .. } = self_mut; + + let allows_split_words = self_mut.allows_split_words(); + let QueryTerm { + original, + is_prefix, + one_typo, + max_levenshtein_distance: max_nbr_typos, + .. + } = self_mut; + let original = *original; let is_prefix = *is_prefix; // let original_str = ctx.word_interner.get(*original).to_owned(); @@ -300,13 +309,17 @@ impl Interned { })?; } - let original_str = ctx.word_interner.get(original).to_owned(); - let split_words = find_split_words(ctx, original_str.as_str())?; + let split_words = if allows_split_words { + let original_str = ctx.word_interner.get(original).to_owned(); + find_split_words(ctx, original_str.as_str())? + } else { + None + }; let self_mut = ctx.term_interner.get_mut(self); // Only add the split words to the derivations if: - // 1. the term is not an ngram; OR + // 1. the term is neither an ngram nor a phrase; OR // 2. the term is an ngram, but the split words are different from the ngram's component words let split_words = if let Some((ngram_words, split_words)) = self_mut.ngram_words.as_ref().zip(split_words.as_ref()) @@ -328,7 +341,13 @@ impl Interned { } fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> { let self_mut = ctx.term_interner.get_mut(self); - let QueryTerm { original, is_prefix, two_typo, max_nbr_typos, .. } = self_mut; + let QueryTerm { + original, + is_prefix, + two_typo, + max_levenshtein_distance: max_nbr_typos, + .. + } = self_mut; let original_str = ctx.word_interner.get(*original).to_owned(); if two_typo.is_init() { return Ok(()); diff --git a/milli/src/search/new/query_term/mod.rs b/milli/src/search/new/query_term/mod.rs index bf521d9b2..8db843037 100644 --- a/milli/src/search/new/query_term/mod.rs +++ b/milli/src/search/new/query_term/mod.rs @@ -43,7 +43,7 @@ pub struct QueryTermSubset { pub struct QueryTerm { original: Interned, ngram_words: Option>>, - max_nbr_typos: u8, + max_levenshtein_distance: u8, is_prefix: bool, zero_typo: ZeroTypoTerm, // May not be computed yet @@ -132,7 +132,6 @@ impl QueryTermSubset { if full_query_term.ngram_words.is_some() { return None; } - // TODO: included in subset if let Some(phrase) = full_query_term.zero_typo.phrase { self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase)) } else if let Some(word) = full_query_term.zero_typo.exact { @@ -182,7 +181,6 @@ impl QueryTermSubset { let word = match &self.zero_typo_subset { NTypoTermSubset::All => Some(use_prefix_db), NTypoTermSubset::Subset { words, phrases: _ } => { - // TODO: use a subset of prefix words instead if words.contains(&use_prefix_db) { Some(use_prefix_db) } else { @@ -204,7 +202,6 @@ impl QueryTermSubset { ctx: &mut SearchContext, ) -> Result> { let mut result = BTreeSet::default(); - // TODO: a compute_partially funtion if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() { self.original.compute_fully_if_needed(ctx)?; } @@ -300,7 +297,6 @@ impl QueryTermSubset { let mut result = BTreeSet::default(); if !self.one_typo_subset.is_empty() { - // TODO: compute less than fully if possible self.original.compute_fully_if_needed(ctx)?; } let original = ctx.term_interner.get_mut(self.original); @@ -342,10 +338,16 @@ impl QueryTermSubset { } None } - pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 { + pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 { let t = ctx.term_interner.get(self.original); - match t.max_nbr_typos { - 0 => 0, + match t.max_levenshtein_distance { + 0 => { + if t.allows_split_words() { + 1 + } else { + 0 + } + } 1 => { if self.one_typo_subset.is_empty() { 0 @@ -438,6 +440,9 @@ impl QueryTerm { self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty() } + fn allows_split_words(&self) -> bool { + self.zero_typo.phrase.is_none() + } } impl Interned { diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index dc317a0fb..5e97d6578 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens( } } TokenKind::Separator(separator_kind) => { - match separator_kind { - SeparatorKind::Hard => { - position += 1; - } - SeparatorKind::Soft => { - position += 0; - } + // add penalty for hard separators + if let SeparatorKind::Hard = separator_kind { + position = position.wrapping_add(7); } phrase = 'phrase: { @@ -143,7 +139,6 @@ pub fn number_of_typos_allowed<'ctx>( let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?; let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?; - // TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms? let exact_words = ctx.index.exact_words(ctx.txn)?; Ok(Box::new(move |word: &str| { @@ -217,7 +212,7 @@ pub fn make_ngram( original: ngram_str_interned, ngram_words: Some(words_interned), is_prefix, - max_nbr_typos, + max_levenshtein_distance: max_nbr_typos, zero_typo: term.zero_typo, one_typo: Lazy::Uninit, two_typo: Lazy::Uninit, @@ -254,8 +249,6 @@ impl PhraseBuilder { } else { // token has kind Word let word = ctx.word_interner.insert(token.lemma().to_string()); - // TODO: in a phrase, check that every word exists - // otherwise return an empty term self.words.push(Some(word)); } } @@ -271,7 +264,7 @@ impl PhraseBuilder { QueryTerm { original: ctx.word_interner.insert(phrase_desc), ngram_words: None, - max_nbr_typos: 0, + max_levenshtein_distance: 0, is_prefix: false, zero_typo: ZeroTypoTerm { phrase: Some(phrase), @@ -288,3 +281,36 @@ impl PhraseBuilder { }) } } + +#[cfg(test)] +mod tests { + use charabia::TokenizerBuilder; + + use super::*; + use crate::index::tests::TempIndex; + + fn temp_index_with_documents() -> TempIndex { + let temp_index = TempIndex::new(); + temp_index + .add_documents(documents!([ + { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" }, + { "id": 2, "name": "Westfália" }, + { "id": 3, "name": "Ŵôřlḑôle" }, + ])) + .unwrap(); + temp_index + } + + #[test] + fn start_with_hard_separator() -> Result<()> { + let tokenizer = TokenizerBuilder::new().build(); + let tokens = tokenizer.tokenize("."); + let index = temp_index_with_documents(); + let rtxn = index.read_txn()?; + let mut ctx = SearchContext::new(&index, &rtxn); + // panics with `attempt to add with overflow` before + let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?; + assert!(located_query_terms.is_empty()); + Ok(()) + } +} diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index c065cc706..e93a91d29 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -1,5 +1,48 @@ -#![allow(clippy::too_many_arguments)] +/** Implements a "PathVisitor" which finds all paths of a certain cost +from the START to END node of a ranking rule graph. +A path is a list of conditions. A condition is the data associated with +an edge, given by the ranking rule. Some edges don't have a condition associated +with them, they are "unconditional". These kinds of edges are used to "skip" a node. + +The algorithm uses a depth-first search. It benefits from two main optimisations: +- The list of all possible costs to go from any node to the END node is precomputed +- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges +untraversable depending on what other edges were selected. + +These two optimisations are meant to avoid traversing edges that wouldn't lead +to a valid path. In practically all cases, we avoid the exponential complexity +that is inherent to depth-first search in a large ranking rule graph. + +The DeadEndsCache is a sort of prefix tree which associates a list of forbidden +conditions to a list of traversed conditions. +For example, the DeadEndsCache could say the following: +- Immediately, from the start, the conditions `[a,b]` are forbidden + - if we take the condition `c`, then the conditions `[e]` are also forbidden + - and if after that, we take `f`, then `[h,i]` are also forbidden + - etc. + - if we take `g`, then `[f]` is also forbidden + - etc. + - etc. +As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden +conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden. + +When a path is found from START to END, we give it to the `visit` closure. +This closure takes a mutable reference to the `DeadEndsCache`. This means that +the caller can update this cache. Therefore, we must handle the case where the +DeadEndsCache has been updated. This means potentially backtracking up to the point +where the traversed conditions are all allowed by the new DeadEndsCache. + +The algorithm also implements the `TermsMatchingStrategy` logic. +Some edges are augmented with a list of "nodes_to_skip". Skipping +a node means "reaching this node through an unconditional edge". If we have +already traversed (ie. not skipped) a node that is in this list, then we know that we +can't traverse this edge. Otherwise, we traverse the edge but make sure to skip any +future node that was present in the "nodes_to_skip" list. + +The caller can decide to stop the path finding algorithm +by returning a `ControlFlow::Break` from the `visit` closure. +*/ use std::collections::{BTreeSet, VecDeque}; use std::iter::FromIterator; use std::ops::ControlFlow; @@ -12,30 +55,41 @@ use crate::search::new::query_graph::QueryNode; use crate::search::new::small_bitmap::SmallBitmap; use crate::Result; +/// Closure which processes a path found by the `PathVisitor` type VisitFn<'f, G> = &'f mut dyn FnMut( + // the path as a list of conditions &[Interned<::Condition>], &mut RankingRuleGraph, + // a mutable reference to the DeadEndsCache, to update it in case the given + // path doesn't resolve to any valid document ids &mut DeadEndsCache<::Condition>, ) -> Result>; +/// A structure which is kept but not updated during the traversal of the graph. +/// It can however be updated by the `visit` closure once a valid path has been found. struct VisitorContext<'a, G: RankingRuleGraphTrait> { graph: &'a mut RankingRuleGraph, all_costs_from_node: &'a MappedInterner>, dead_ends_cache: &'a mut DeadEndsCache, } +/// The internal state of the traversal algorithm struct VisitorState { + /// Budget from the current node to the end node remaining_cost: u64, - + /// Previously visited conditions, in order. path: Vec>, - + /// Previously visited conditions, as an efficient and compact set. visited_conditions: SmallBitmap, + /// Previously visited (ie not skipped) nodes, as an efficient and compact set. visited_nodes: SmallBitmap, - + /// The conditions that cannot be visited anymore forbidden_conditions: SmallBitmap, - forbidden_conditions_to_nodes: SmallBitmap, + /// The nodes that cannot be visited anymore (they must be skipped) + nodes_to_skip: SmallBitmap, } +/// See module documentation pub struct PathVisitor<'a, G: RankingRuleGraphTrait> { state: VisitorState, ctx: VisitorContext<'a, G>, @@ -56,14 +110,13 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> { forbidden_conditions: SmallBitmap::for_interned_values_in( &graph.conditions_interner, ), - forbidden_conditions_to_nodes: SmallBitmap::for_interned_values_in( - &graph.query_graph.nodes, - ), + nodes_to_skip: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes), }, ctx: VisitorContext { graph, all_costs_from_node, dead_ends_cache }, } } + /// See module documentation pub fn visit_paths(mut self, visit: VisitFn) -> Result<()> { let _ = self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?; @@ -72,22 +125,31 @@ impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> { } impl VisitorState { + /// Visits a node: traverse all its valid conditional and unconditional edges. + /// + /// Returns ControlFlow::Break if the path finding algorithm should stop. + /// Returns whether a valid path was found from this node otherwise. fn visit_node( &mut self, from_node: Interned, visit: VisitFn, ctx: &mut VisitorContext, ) -> Result> { + // any valid path will be found from this point + // if a valid path was found, then we know that the DeadEndsCache may have been updated, + // and we will need to do more work to potentially backtrack let mut any_valid = false; let edges = ctx.graph.edges_of_node.get(from_node).clone(); for edge_idx in edges.iter() { + // could be none if the edge was deleted let Some(edge) = ctx.graph.edges_store.get(edge_idx).clone() else { continue }; if self.remaining_cost < edge.cost as u64 { continue; } self.remaining_cost -= edge.cost as u64; + let cf = match edge.condition { Some(condition) => self.visit_condition( condition, @@ -119,6 +181,10 @@ impl VisitorState { Ok(ControlFlow::Continue(any_valid)) } + /// Visits an unconditional edge. + /// + /// Returns ControlFlow::Break if the path finding algorithm should stop. + /// Returns whether a valid path was found from this node otherwise. fn visit_no_condition( &mut self, dest_node: Interned, @@ -134,20 +200,29 @@ impl VisitorState { { return Ok(ControlFlow::Continue(false)); } + // We've reached the END node! if dest_node == ctx.graph.query_graph.end_node { let control_flow = visit(&self.path, ctx.graph, ctx.dead_ends_cache)?; + // We could change the return type of the visit closure such that the caller + // tells us whether the dead ends cache was updated or not. + // Alternatively, maybe the DeadEndsCache should have a generation number + // to it, so that we don't need to play with these booleans at all. match control_flow { ControlFlow::Continue(_) => Ok(ControlFlow::Continue(true)), ControlFlow::Break(_) => Ok(ControlFlow::Break(())), } } else { - let old_fbct = self.forbidden_conditions_to_nodes.clone(); - self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip); + let old_fbct = self.nodes_to_skip.clone(); + self.nodes_to_skip.union(edge_new_nodes_to_skip); let cf = self.visit_node(dest_node, visit, ctx)?; - self.forbidden_conditions_to_nodes = old_fbct; + self.nodes_to_skip = old_fbct; Ok(cf) } } + /// Visits a conditional edge. + /// + /// Returns ControlFlow::Break if the path finding algorithm should stop. + /// Returns whether a valid path was found from this node otherwise. fn visit_condition( &mut self, condition: Interned, @@ -159,7 +234,7 @@ impl VisitorState { assert!(dest_node != ctx.graph.query_graph.end_node); if self.forbidden_conditions.contains(condition) - || self.forbidden_conditions_to_nodes.contains(dest_node) + || self.nodes_to_skip.contains(dest_node) || edge_new_nodes_to_skip.intersects(&self.visited_nodes) { return Ok(ControlFlow::Continue(false)); @@ -180,19 +255,19 @@ impl VisitorState { self.visited_nodes.insert(dest_node); self.visited_conditions.insert(condition); - let old_fc = self.forbidden_conditions.clone(); + let old_forb_cond = self.forbidden_conditions.clone(); if let Some(next_forbidden) = ctx.dead_ends_cache.forbidden_conditions_after_prefix(self.path.iter().copied()) { self.forbidden_conditions.union(&next_forbidden); } - let old_fctn = self.forbidden_conditions_to_nodes.clone(); - self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip); + let old_nodes_to_skip = self.nodes_to_skip.clone(); + self.nodes_to_skip.union(edge_new_nodes_to_skip); let cf = self.visit_node(dest_node, visit, ctx)?; - self.forbidden_conditions_to_nodes = old_fctn; - self.forbidden_conditions = old_fc; + self.nodes_to_skip = old_nodes_to_skip; + self.forbidden_conditions = old_forb_cond; self.visited_conditions.remove(condition); self.visited_nodes.remove(dest_node); @@ -205,18 +280,12 @@ impl VisitorState { impl RankingRuleGraph { pub fn find_all_costs_to_end(&self) -> MappedInterner> { let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]); - let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len()); - let mut node_stack = VecDeque::new(); - - *costs_to_end.get_mut(self.query_graph.end_node) = vec![0]; - - for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() { - node_stack.push_back(prev_node); - enqueued.insert(prev_node); - } - - while let Some(cur_node) = node_stack.pop_front() { + self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| { + if cur_node == self.query_graph.end_node { + *costs_to_end.get_mut(self.query_graph.end_node) = vec![0]; + return; + } let mut self_costs = Vec::::new(); let cur_node_edges = &self.edges_of_node.get(cur_node); @@ -232,13 +301,7 @@ impl RankingRuleGraph { self_costs.dedup(); *costs_to_end.get_mut(cur_node) = self_costs; - for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { - if !enqueued.contains(prev_node) { - node_stack.push_back(prev_node); - enqueued.insert(prev_node); - } - } - } + }); costs_to_end } @@ -247,17 +310,12 @@ impl RankingRuleGraph { node_with_removed_outgoing_conditions: Interned, costs: &mut MappedInterner>, ) { - let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len()); - let mut node_stack = VecDeque::new(); - - enqueued.insert(node_with_removed_outgoing_conditions); - node_stack.push_back(node_with_removed_outgoing_conditions); - - 'main_loop: while let Some(cur_node) = node_stack.pop_front() { + // Traverse the graph backward from the target node, recomputing the cost for each of its predecessors. + // We first check that no other node is contributing the same total cost to a predecessor before removing + // the cost from the predecessor. + self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| { let mut costs_to_remove = FxHashSet::default(); - for c in costs.get(cur_node) { - costs_to_remove.insert(*c); - } + costs_to_remove.extend(costs.get(cur_node).iter().copied()); let cur_node_edges = &self.edges_of_node.get(cur_node); for edge_idx in cur_node_edges.iter() { @@ -265,22 +323,75 @@ impl RankingRuleGraph { for cost in costs.get(edge.dest_node).iter() { costs_to_remove.remove(&(*cost + edge.cost as u64)); if costs_to_remove.is_empty() { - continue 'main_loop; + return; } } } if costs_to_remove.is_empty() { - continue 'main_loop; + return; } let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied()); for c in costs_to_remove { new_costs.remove(&c); } *costs.get_mut(cur_node) = new_costs.into_iter().collect(); + }); + } + /// Traverse the graph backwards from the given node such that every time + /// a node is visited, we are guaranteed that all its successors either: + /// 1. have already been visited; OR + /// 2. were not reachable from the given node + pub fn traverse_breadth_first_backward( + &self, + from: Interned, + mut visit: impl FnMut(Interned), + ) { + let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + { + // go backward to get the set of all reachable nodes from the given node + // the nodes that are not reachable will be set as `visited` + let mut stack = VecDeque::new(); + let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + enqueued.insert(from); + stack.push_back(from); + while let Some(n) = stack.pop_front() { + if reachable.contains(n) { + continue; + } + reachable.insert(n); + for prev_node in self.query_graph.nodes.get(n).predecessors.iter() { + if !enqueued.contains(prev_node) && !reachable.contains(prev_node) { + stack.push_back(prev_node); + enqueued.insert(prev_node); + } + } + } + }; + let mut unreachable_or_visited = + SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + for (n, _) in self.query_graph.nodes.iter() { + if !reachable.contains(n) { + unreachable_or_visited.insert(n); + } + } + + let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes); + let mut stack = VecDeque::new(); + + enqueued.insert(from); + stack.push_back(from); + + while let Some(cur_node) = stack.pop_front() { + if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) { + stack.push_back(cur_node); + continue; + } + unreachable_or_visited.insert(cur_node); + visit(cur_node); for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { - if !enqueued.contains(prev_node) { - node_stack.push_back(prev_node); + if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) { + stack.push_back(prev_node); enqueued.insert(prev_node); } } diff --git a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs index d0fcd8bd8..5d199c82a 100644 --- a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs @@ -9,12 +9,8 @@ use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::SearchContext; use crate::Result; -// TODO: give a generation to each universe, then be able to get the exact -// delta of docids between two universes of different generations! - /// A cache storing the document ids associated with each ranking rule edge pub struct ConditionDocIdsCache { - // TOOD: should be a mapped interner? pub cache: FxHashMap, ComputedCondition>, _phantom: PhantomData, } @@ -54,7 +50,7 @@ impl ConditionDocIdsCache { } let condition = graph.conditions_interner.get_mut(interned_condition); let computed = G::resolve_condition(ctx, condition, universe)?; - // TODO: if computed.universe_len != universe.len() ? + // Can we put an assert here for computed.universe_len == universe.len() ? let _ = self.cache.insert(interned_condition, computed); let computed = &self.cache[&interned_condition]; Ok(computed) diff --git a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs b/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs index 4bbf91fcd..bac25da82 100644 --- a/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/dead_ends_cache.rs @@ -2,6 +2,7 @@ use crate::search::new::interner::{FixedSizeInterner, Interned}; use crate::search::new::small_bitmap::SmallBitmap; pub struct DeadEndsCache { + // conditions and next could/should be part of the same vector conditions: Vec>, next: Vec, pub forbidden: SmallBitmap, @@ -27,7 +28,7 @@ impl DeadEndsCache { self.forbidden.insert(condition); } - pub fn advance(&mut self, condition: Interned) -> Option<&mut Self> { + fn advance(&mut self, condition: Interned) -> Option<&mut Self> { if let Some(idx) = self.conditions.iter().position(|c| *c == condition) { Some(&mut self.next[idx]) } else { diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs index 0f2cceaec..e3ccf23fa 100644 --- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -69,14 +69,9 @@ impl RankingRuleGraphTrait for FidGraph { let mut edges = vec![]; for fid in all_fields { - // TODO: We can improve performances and relevancy by storing - // the term subsets associated to each field ids fetched. edges.push(( - fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. - conditions_interner.insert(FidCondition { - term: term.clone(), // TODO remove this ugly clone - fid, - }), + fid as u32 * term.term_ids.len() as u32, + conditions_interner.insert(FidCondition { term: term.clone(), fid }), )); } diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index f60c481de..8de455822 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -20,6 +20,8 @@ mod position; mod proximity; /// Implementation of the `typo` ranking rule mod typo; +/// Implementation of the `words` ranking rule +mod words; use std::collections::BTreeSet; use std::hash::Hash; @@ -33,6 +35,7 @@ pub use position::{PositionCondition, PositionGraph}; pub use proximity::{ProximityCondition, ProximityGraph}; use roaring::RoaringBitmap; pub use typo::{TypoCondition, TypoGraph}; +pub use words::{WordsCondition, WordsGraph}; use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner}; use super::query_term::LocatedQueryTermSubset; diff --git a/milli/src/search/new/ranking_rule_graph/position/mod.rs b/milli/src/search/new/ranking_rule_graph/position/mod.rs index d4640097e..c2e3b9012 100644 --- a/milli/src/search/new/ranking_rule_graph/position/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs @@ -94,14 +94,9 @@ impl RankingRuleGraphTrait for PositionGraph { let mut edges = vec![]; for (cost, positions) in positions_for_costs { - // TODO: We can improve performances and relevancy by storing - // the term subsets associated to each position fetched edges.push(( cost, - conditions_interner.insert(PositionCondition { - term: term.clone(), // TODO remove this ugly clone - positions, - }), + conditions_interner.insert(PositionCondition { term: term.clone(), positions }), )); } @@ -111,23 +106,16 @@ impl RankingRuleGraphTrait for PositionGraph { fn cost_from_position(sum_positions: u32) -> u32 { match sum_positions { - 0 | 1 | 2 | 3 => sum_positions, - 4 | 5 => 4, - 6 | 7 => 5, - 8 | 9 => 6, - 10 | 11 => 7, - 12 | 13 => 8, - 14 | 15 => 9, - 16 | 17..=24 => 10, - 25..=32 => 11, - 33..=64 => 12, - 65..=128 => 13, - 129..=256 => 14, - 257..=512 => 15, - 513..=1024 => 16, - 1025..=2048 => 17, - 2049..=4096 => 18, - 4097..=8192 => 19, - _ => 20, + 0 => 0, + 1 => 1, + 2..=4 => 2, + 5..=7 => 3, + 8..=11 => 4, + 12..=16 => 5, + 17..=24 => 6, + 25..=64 => 7, + 65..=256 => 8, + 257..=1024 => 9, + _ => 10, } } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 057779a22..29a1876b4 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -65,13 +65,6 @@ pub fn compute_docids( } } - // TODO: add safeguard in case the cartesian product is too large! - // even if we restrict the word derivations to a maximum of 100, the size of the - // caterisan product could reach a maximum of 10_000 derivations, which is way too much. - // Maybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo - // + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been - // reached - for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)? { // Before computing the edges, check that the left word and left phrase // aren't disjoint with the universe, but only do it if there is more than @@ -111,8 +104,6 @@ pub fn compute_docids( Ok(ComputedCondition { docids, universe_len: universe.len(), - // TODO: think about whether we want to reduce the subset, - // we probably should! start_term_subset: Some(left_term.clone()), end_term_subset: right_term.clone(), }) @@ -203,12 +194,7 @@ fn compute_non_prefix_edges( *docids |= new_docids; } } - if backward_proximity >= 1 - // TODO: for now, we don't do any swapping when either term is a phrase - // but maybe we should. We'd need to look at the first/last word of the phrase - // depending on the context. - && left_phrase.is_none() && right_phrase.is_none() - { + if backward_proximity >= 1 && left_phrase.is_none() && right_phrase.is_none() { if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)? { diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index da5198c23..a44be6015 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -50,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph { // 3-gram -> equivalent to 2 typos let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 }; - for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) { + for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) { let mut term = term.clone(); match nbr_typos { 0 => { diff --git a/milli/src/search/new/ranking_rule_graph/words/mod.rs b/milli/src/search/new/ranking_rule_graph/words/mod.rs new file mode 100644 index 000000000..0a0cc112b --- /dev/null +++ b/milli/src/search/new/ranking_rule_graph/words/mod.rs @@ -0,0 +1,49 @@ +use roaring::RoaringBitmap; + +use super::{ComputedCondition, RankingRuleGraphTrait}; +use crate::search::new::interner::{DedupInterner, Interned}; +use crate::search::new::query_term::LocatedQueryTermSubset; +use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; +use crate::search::new::SearchContext; +use crate::Result; + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct WordsCondition { + term: LocatedQueryTermSubset, +} + +pub enum WordsGraph {} + +impl RankingRuleGraphTrait for WordsGraph { + type Condition = WordsCondition; + + fn resolve_condition( + ctx: &mut SearchContext, + condition: &Self::Condition, + universe: &RoaringBitmap, + ) -> Result { + let WordsCondition { term, .. } = condition; + // maybe compute_query_term_subset_docids should accept a universe as argument + let mut docids = compute_query_term_subset_docids(ctx, &term.term_subset)?; + docids &= universe; + + Ok(ComputedCondition { + docids, + universe_len: universe.len(), + start_term_subset: None, + end_term_subset: term.clone(), + }) + } + + fn build_edges( + _ctx: &mut SearchContext, + conditions_interner: &mut DedupInterner, + _from: Option<&LocatedQueryTermSubset>, + to_term: &LocatedQueryTermSubset, + ) -> Result)>> { + Ok(vec![( + to_term.term_ids.len() as u32, + conditions_interner.insert(WordsCondition { term: to_term.clone() }), + )]) + } +} diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index 797db5875..d992cd22f 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -33,8 +33,6 @@ pub fn compute_query_term_subset_docids( ctx: &mut SearchContext, term: &QueryTermSubset, ) -> Result { - // TODO Use the roaring::MultiOps trait - let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_docids) = ctx.word_docids(word)? { @@ -59,8 +57,6 @@ pub fn compute_query_term_subset_docids_within_field_id( term: &QueryTermSubset, fid: u16, ) -> Result { - // TODO Use the roaring::MultiOps trait - let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(word.interned(), fid)? { @@ -71,7 +67,6 @@ pub fn compute_query_term_subset_docids_within_field_id( for phrase in term.all_phrases(ctx)? { // There may be false positives when resolving a phrase, so we're not // guaranteed that all of its words are within a single fid. - // TODO: fix this? if let Some(word) = phrase.words(ctx).iter().flatten().next() { if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? { docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids; @@ -95,7 +90,6 @@ pub fn compute_query_term_subset_docids_within_position( term: &QueryTermSubset, position: u16, ) -> Result { - // TODO Use the roaring::MultiOps trait let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_position_docids) = @@ -108,7 +102,6 @@ pub fn compute_query_term_subset_docids_within_position( for phrase in term.all_phrases(ctx)? { // It's difficult to know the expected position of the words in the phrase, // so instead we just check the first one. - // TODO: fix this? if let Some(word) = phrase.words(ctx).iter().flatten().next() { if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? { docids |= ctx.get_phrase_docids(phrase)? & word_position_docids @@ -132,9 +125,6 @@ pub fn compute_query_graph_docids( q: &QueryGraph, universe: &RoaringBitmap, ) -> Result { - // TODO: there must be a faster way to compute this big - // roaring bitmap expression - let mut nodes_resolved = SmallBitmap::for_interned_values_in(&q.nodes); let mut path_nodes_docids = q.nodes.map(|_| RoaringBitmap::new()); diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index 53144d00d..3f57b2aa5 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -141,10 +141,6 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx, universe: &RoaringBitmap, ) -> Result>> { let iter = self.iter.as_mut().unwrap(); - // TODO: we should make use of the universe in the function below - // good for correctness, but ideally iter.next_bucket would take the current universe into account, - // as right now it could return buckets that don't intersect with the universe, meaning we will make many - // unneeded calls. if let Some(mut bucket) = iter.next_bucket()? { bucket.candidates &= universe; Ok(Some(bucket)) diff --git a/milli/src/search/new/tests/attribute_position.rs b/milli/src/search/new/tests/attribute_position.rs index 5e16cd023..37f303b10 100644 --- a/milli/src/search/new/tests/attribute_position.rs +++ b/milli/src/search/new/tests/attribute_position.rs @@ -138,7 +138,7 @@ fn test_attribute_position_simple() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); } #[test] fn test_attribute_position_repeated() { @@ -163,7 +163,7 @@ fn test_attribute_position_different_fields() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); } #[test] @@ -176,5 +176,5 @@ fn test_attribute_position_ngrams() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("quick brown"); let SearchResult { documents_ids, .. } = s.execute().unwrap(); - insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]"); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]"); } diff --git a/milli/src/search/new/tests/distinct.rs b/milli/src/search/new/tests/distinct.rs index 2c147d514..c54600f27 100644 --- a/milli/src/search/new/tests/distinct.rs +++ b/milli/src/search/new/tests/distinct.rs @@ -527,7 +527,7 @@ fn test_distinct_all_candidates() { let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap(); let candidates = candidates.iter().collect::>(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); - // TODO: this is incorrect! + // This is incorrect, but unfortunately impossible to do better efficiently. insta::assert_snapshot!(format!("{candidates:?}"), @"[1, 4, 7, 8, 14, 17, 19, 20, 23, 24, 25, 26]"); } diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs index 401508866..6e4181a95 100644 --- a/milli/src/search/new/tests/proximity.rs +++ b/milli/src/search/new/tests/proximity.rs @@ -122,11 +122,11 @@ fn create_edge_cases_index() -> TempIndex { sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz " }, - // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. - // If the search query is "sunflower", the split word "Sun Flower" will match some documents. + // The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`. + // If the search query is "sunflower", the split word "Sun Flower" will match some documents. // If the query is `sunflower wilting`, then we should make sure that - // the sprximity condition `flower wilting: sprx N` also comes with the condition - // `sun wilting: sprx N+1`. TODO: this is not the exact condition we use for now. + // the proximity condition `flower wilting: sprx N` also comes with the condition + // `sun wilting: sprx N+1`, but this is not the exact condition we use for now. // We only check that the phrase `sun flower` exists and `flower wilting: sprx N`, which // is better than nothing but not the best. { @@ -139,7 +139,7 @@ fn create_edge_cases_index() -> TempIndex { }, { "id": 3, - // This document matches the query `sunflower wilting`, but the sprximity condition + // This document matches the query `sunflower wilting`, but the sprximity condition // between `sunflower` and `wilting` cannot be through the split-word `Sun Flower` // which would reduce to only `flower` and `wilting` being in sprximity. "text": "A flower wilting under the sun, unlike a sunflower" @@ -299,7 +299,7 @@ fn test_proximity_split_word() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: "2" and "4" should be swapped ideally + // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" [ "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", @@ -316,7 +316,7 @@ fn test_proximity_split_word() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: "2" and "4" should be swapped ideally + // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" [ "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", @@ -341,7 +341,7 @@ fn test_proximity_split_word() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: "2" and "4" should be swapped ideally + // "2" and "4" should be swapped ideally insta::assert_debug_snapshot!(texts, @r###" [ "\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"", diff --git a/milli/src/search/new/tests/proximity_typo.rs b/milli/src/search/new/tests/proximity_typo.rs index ab98f99c0..b459b178b 100644 --- a/milli/src/search/new/tests/proximity_typo.rs +++ b/milli/src/search/new/tests/proximity_typo.rs @@ -2,9 +2,8 @@ This module tests the interactions between the proximity and typo ranking rules. The proximity ranking rule should transform the query graph such that it -only contains the word pairs that it used to compute its bucket. - -TODO: This is not currently implemented. +only contains the word pairs that it used to compute its bucket, but this is not currently +implemented. */ use crate::index::tests::TempIndex; @@ -64,7 +63,7 @@ fn test_trap_basic() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); - // TODO: this is incorrect, 1 should come before 0 + // This is incorrect, 1 should come before 0 insta::assert_debug_snapshot!(texts, @r###" [ "\"summer. holiday. sommer holidty\"", diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs index 8fd9de5fc..536f6653d 100644 --- a/milli/src/search/new/tests/typo.rs +++ b/milli/src/search/new/tests/typo.rs @@ -571,8 +571,8 @@ fn test_typo_synonyms() { s.terms_matching_strategy(TermsMatchingStrategy::All); s.query("the fast brownish fox jumps over the lackadaisical dog"); - // TODO: is this correct? interaction of ngrams + synonyms means that the - // multi-word synonyms end up having a typo cost. This is probably not what we want. + // The interaction of ngrams + synonyms means that the multi-word synonyms end up having a typo cost. + // This is probably not what we want. let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]"); let texts = collect_field_values(&index, &txn, "text", &documents_ids); diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs deleted file mode 100644 index 72b7b5916..000000000 --- a/milli/src/search/new/words.rs +++ /dev/null @@ -1,87 +0,0 @@ -use roaring::RoaringBitmap; - -use super::logger::SearchLogger; -use super::query_graph::QueryNode; -use super::resolve_query_graph::compute_query_graph_docids; -use super::small_bitmap::SmallBitmap; -use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; -use crate::{Result, TermsMatchingStrategy}; - -pub struct Words { - exhausted: bool, // TODO: remove - query_graph: Option, - nodes_to_remove: Vec>, - terms_matching_strategy: TermsMatchingStrategy, -} -impl Words { - pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { - Self { - exhausted: true, - query_graph: None, - nodes_to_remove: vec![], - terms_matching_strategy, - } - } -} - -impl<'ctx> RankingRule<'ctx, QueryGraph> for Words { - fn id(&self) -> String { - "words".to_owned() - } - fn start_iteration( - &mut self, - ctx: &mut SearchContext<'ctx>, - _logger: &mut dyn SearchLogger, - _universe: &RoaringBitmap, - parent_query_graph: &QueryGraph, - ) -> Result<()> { - self.exhausted = false; - self.query_graph = Some(parent_query_graph.clone()); - self.nodes_to_remove = match self.terms_matching_strategy { - TermsMatchingStrategy::Last => { - let mut ns = parent_query_graph.removal_order_for_terms_matching_strategy_last(ctx); - ns.reverse(); - ns - } - TermsMatchingStrategy::All => { - vec![] - } - }; - Ok(()) - } - - fn next_bucket( - &mut self, - ctx: &mut SearchContext<'ctx>, - logger: &mut dyn SearchLogger, - universe: &RoaringBitmap, - ) -> Result>> { - if self.exhausted { - return Ok(None); - } - let Some(query_graph) = &mut self.query_graph else { panic!() }; - logger.log_internal_state(query_graph); - - let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?; - - let child_query_graph = query_graph.clone(); - - if self.nodes_to_remove.is_empty() { - self.exhausted = true; - } else { - let nodes_to_remove = self.nodes_to_remove.pop().unwrap(); - query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::>()); - } - Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket })) - } - - fn end_iteration( - &mut self, - _ctx: &mut SearchContext<'ctx>, - _logger: &mut dyn SearchLogger, - ) { - self.exhausted = true; - self.nodes_to_remove = vec![]; - self.query_graph = None; - } -} diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index b70bea496..158f515b8 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -89,7 +89,6 @@ Create a snapshot test of the given database. - `exact_word_docids` - `word_prefix_docids` - `exact_word_prefix_docids` - - `docid_word_positions` - `word_pair_proximity_docids` - `word_prefix_pair_proximity_docids` - `word_position_docids` @@ -217,11 +216,6 @@ pub fn snap_exact_word_prefix_docids(index: &Index) -> String { &format!("{s:<16} {}", display_bitmap(&b)) }) } -pub fn snap_docid_word_positions(index: &Index) -> String { - make_db_snap_from_iter!(index, docid_word_positions, |((idx, s), b)| { - &format!("{idx:<6} {s:<16} {}", display_bitmap(&b)) - }) -} pub fn snap_word_pair_proximity_docids(index: &Index) -> String { make_db_snap_from_iter!(index, word_pair_proximity_docids, |((proximity, word1, word2), b)| { &format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b)) @@ -324,7 +318,7 @@ pub fn snap_field_distributions(index: &Index) -> String { let rtxn = index.read_txn().unwrap(); let mut snap = String::new(); for (field, count) in index.field_distribution(&rtxn).unwrap() { - writeln!(&mut snap, "{field:<16} {count:<6}").unwrap(); + writeln!(&mut snap, "{field:<16} {count:<6} |").unwrap(); } snap } @@ -334,7 +328,7 @@ pub fn snap_fields_ids_map(index: &Index) -> String { let mut snap = String::new(); for field_id in fields_ids_map.ids() { let name = fields_ids_map.name(field_id).unwrap(); - writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap(); + writeln!(&mut snap, "{field_id:<3} {name:<16} |").unwrap(); } snap } @@ -477,9 +471,6 @@ macro_rules! full_snap_of_db { ($index:ident, exact_word_prefix_docids) => {{ $crate::snapshot_tests::snap_exact_word_prefix_docids(&$index) }}; - ($index:ident, docid_word_positions) => {{ - $crate::snapshot_tests::snap_docid_word_positions(&$index) - }}; ($index:ident, word_pair_proximity_docids) => {{ $crate::snapshot_tests::snap_word_pair_proximity_docids(&$index) }}; diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap index 9b074fb59..1d1d629e6 100644 --- a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap +++ b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap @@ -1,7 +1,7 @@ --- source: milli/src/index.rs --- -age 1 -id 2 -name 2 +age 1 | +id 2 | +name 2 | diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap index 9b074fb59..1d1d629e6 100644 --- a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap +++ b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap @@ -1,7 +1,7 @@ --- source: milli/src/index.rs --- -age 1 -id 2 -name 2 +age 1 | +id 2 | +name 2 | diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs index 147643bad..04119c641 100644 --- a/milli/src/update/clear_documents.rs +++ b/milli/src/update/clear_documents.rs @@ -23,7 +23,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { exact_word_docids, word_prefix_docids, exact_word_prefix_docids, - docid_word_positions, word_pair_proximity_docids, word_prefix_pair_proximity_docids, prefix_word_pair_proximity_docids, @@ -80,7 +79,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> { exact_word_docids.clear(self.wtxn)?; word_prefix_docids.clear(self.wtxn)?; exact_word_prefix_docids.clear(self.wtxn)?; - docid_word_positions.clear(self.wtxn)?; word_pair_proximity_docids.clear(self.wtxn)?; word_prefix_pair_proximity_docids.clear(self.wtxn)?; prefix_word_pair_proximity_docids.clear(self.wtxn)?; @@ -141,7 +139,6 @@ mod tests { assert!(index.word_docids.is_empty(&rtxn).unwrap()); assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap()); - assert!(index.docid_word_positions.is_empty(&rtxn).unwrap()); assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap()); assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap()); assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap()); diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 311f93f8f..b971768a3 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -1,5 +1,5 @@ use std::collections::btree_map::Entry; -use std::collections::{HashMap, HashSet}; +use std::collections::{BTreeSet, HashMap, HashSet}; use fst::IntoStreamer; use heed::types::{ByteSlice, DecodeIgnore, Str, UnalignedSlice}; @@ -15,8 +15,7 @@ use crate::facet::FacetType; use crate::heed_codec::facet::FieldDocIdFacetCodec; use crate::heed_codec::CboRoaringBitmapCodec; use crate::{ - ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, - SmallString32, BEU32, + ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec, BEU32, }; pub struct DeleteDocuments<'t, 'u, 'i> { @@ -72,7 +71,6 @@ impl std::fmt::Display for DeletionStrategy { pub(crate) struct DetailedDocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, - pub soft_deletion_used: bool, } impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { @@ -109,11 +107,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Some(docid) } pub fn execute(self) -> Result { - let DetailedDocumentDeletionResult { - deleted_documents, - remaining_documents, - soft_deletion_used: _, - } = self.execute_inner()?; + let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } = + self.execute_inner()?; Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) } @@ -134,7 +129,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: 0, remaining_documents: 0, - soft_deletion_used: false, }); } @@ -150,7 +144,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, - soft_deletion_used: false, }); } @@ -219,7 +212,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), - soft_deletion_used: true, }); } @@ -232,7 +224,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { exact_word_docids, word_prefix_docids, exact_word_prefix_docids, - docid_word_positions, word_pair_proximity_docids, field_id_word_count_docids, word_prefix_pair_proximity_docids, @@ -251,23 +242,9 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { facet_id_is_empty_docids, documents, } = self.index; - - // Retrieve the words contained in the documents. - let mut words = Vec::new(); + // Remove from the documents database for docid in &self.to_delete_docids { documents.delete(self.wtxn, &BEU32::new(docid))?; - - // We iterate through the words positions of the document id, retrieve the word and delete the positions. - // We create an iterator to be able to get the content and delete the key-value itself. - // It's faster to acquire a cursor to get and delete, as we avoid traversing the LMDB B-Tree two times but only once. - let mut iter = docid_word_positions.prefix_iter_mut(self.wtxn, &(docid, ""))?; - while let Some(result) = iter.next() { - let ((_docid, word), _positions) = result?; - // This boolean will indicate if we must remove this word from the words FST. - words.push((SmallString32::from(word), false)); - // safety: we don't keep references from inside the LMDB database. - unsafe { iter.del_current()? }; - } } // We acquire the current external documents ids map... // Note that its soft-deleted document ids field will be equal to the `to_delete_docids` @@ -278,42 +255,27 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let new_external_documents_ids = new_external_documents_ids.into_static(); self.index.put_external_documents_ids(self.wtxn, &new_external_documents_ids)?; - // Maybe we can improve the get performance of the words - // if we sort the words first, keeping the LMDB pages in cache. - words.sort_unstable(); - + let mut words_to_keep = BTreeSet::default(); + let mut words_to_delete = BTreeSet::default(); // We iterate over the words and delete the documents ids // from the word docids database. - for (word, must_remove) in &mut words { - remove_from_word_docids( - self.wtxn, - word_docids, - word.as_str(), - must_remove, - &self.to_delete_docids, - )?; - - remove_from_word_docids( - self.wtxn, - exact_word_docids, - word.as_str(), - must_remove, - &self.to_delete_docids, - )?; - } + remove_from_word_docids( + self.wtxn, + word_docids, + &self.to_delete_docids, + &mut words_to_keep, + &mut words_to_delete, + )?; + remove_from_word_docids( + self.wtxn, + exact_word_docids, + &self.to_delete_docids, + &mut words_to_keep, + &mut words_to_delete, + )?; // We construct an FST set that contains the words to delete from the words FST. - let words_to_delete = - words.iter().filter_map( - |(word, must_remove)| { - if *must_remove { - Some(word.as_str()) - } else { - None - } - }, - ); - let words_to_delete = fst::Set::from_iter(words_to_delete)?; + let words_to_delete = fst::Set::from_iter(words_to_delete.difference(&words_to_keep))?; let new_words_fst = { // We retrieve the current words FST from the database. @@ -472,7 +434,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), - soft_deletion_used: false, }) } @@ -532,23 +493,24 @@ fn remove_from_word_prefix_docids( fn remove_from_word_docids( txn: &mut heed::RwTxn, db: &heed::Database, - word: &str, - must_remove: &mut bool, to_remove: &RoaringBitmap, + words_to_keep: &mut BTreeSet, + words_to_remove: &mut BTreeSet, ) -> Result<()> { // We create an iterator to be able to get the content and delete the word docids. // It's faster to acquire a cursor to get and delete or put, as we avoid traversing // the LMDB B-Tree two times but only once. - let mut iter = db.prefix_iter_mut(txn, word)?; - if let Some((key, mut docids)) = iter.next().transpose()? { - if key == word { - let previous_len = docids.len(); - docids -= to_remove; - if docids.is_empty() { - // safety: we don't keep references from inside the LMDB database. - unsafe { iter.del_current()? }; - *must_remove = true; - } else if docids.len() != previous_len { + let mut iter = db.iter_mut(txn)?; + while let Some((key, mut docids)) = iter.next().transpose()? { + let previous_len = docids.len(); + docids -= to_remove; + if docids.is_empty() { + // safety: we don't keep references from inside the LMDB database. + unsafe { iter.del_current()? }; + words_to_remove.insert(key.to_owned()); + } else { + words_to_keep.insert(key.to_owned()); + if docids.len() != previous_len { let key = key.to_owned(); // safety: we don't keep references from inside the LMDB database. unsafe { iter.put_current(&key, &docids)? }; @@ -627,7 +589,7 @@ mod tests { use super::*; use crate::index::tests::TempIndex; - use crate::{db_snap, Filter}; + use crate::{db_snap, Filter, Search}; fn delete_documents<'t>( wtxn: &mut RwTxn<'t, '_>, @@ -1199,4 +1161,52 @@ mod tests { DeletionStrategy::AlwaysSoft, ); } + + #[test] + fn delete_words_exact_attributes() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_primary_key(S("id")); + settings.set_searchable_fields(vec![S("text"), S("exact")]); + settings.set_exact_attributes(vec![S("exact")].into_iter().collect()); + }) + .unwrap(); + + index + .add_documents(documents!([ + { "id": 0, "text": "hello" }, + { "id": 1, "exact": "hello"} + ])) + .unwrap(); + db_snap!(index, word_docids, 1, @r###" + hello [0, ] + "###); + db_snap!(index, exact_word_docids, 1, @r###" + hello [1, ] + "###); + db_snap!(index, words_fst, 1, @"300000000000000001084cfcfc2ce1000000016000000090ea47f"); + + let mut wtxn = index.write_txn().unwrap(); + let deleted_internal_ids = + delete_documents(&mut wtxn, &index, &["1"], DeletionStrategy::AlwaysHard); + wtxn.commit().unwrap(); + + db_snap!(index, word_docids, 2, @r###" + hello [0, ] + "###); + db_snap!(index, exact_word_docids, 2, @""); + db_snap!(index, words_fst, 2, @"300000000000000001084cfcfc2ce1000000016000000090ea47f"); + + insta::assert_snapshot!(format!("{deleted_internal_ids:?}"), @"[1]"); + let txn = index.read_txn().unwrap(); + let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap(); + insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###); + + let mut s = Search::new(&txn, &index); + s.query("hello"); + let crate::SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]"); + } } diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 39a3ef437..2fd748d4d 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -261,7 +261,9 @@ pub(crate) mod test_helpers { let options = options.map_size(4096 * 4 * 1000 * 100); let tempdir = tempfile::TempDir::new().unwrap(); let env = options.open(tempdir.path()).unwrap(); - let content = env.create_database(None).unwrap(); + let mut wtxn = env.write_txn().unwrap(); + let content = env.create_database(&mut wtxn, None).unwrap(); + wtxn.commit().unwrap(); FacetIndex { content, diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs index 315ebdf0c..fe8eb93ed 100644 --- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs +++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use std::fs::File; -use std::{cmp, io}; +use std::io; use grenad::Sorter; @@ -54,11 +54,10 @@ pub fn extract_fid_word_count_docids( } for position in read_u32_ne_bytes(value) { - let (field_id, position) = relative_from_absolute_position(position); - let word_count = position as u32 + 1; + let (field_id, _) = relative_from_absolute_position(position); let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0); - *value = cmp::max(*value, word_count); + *value += 1; } } @@ -83,7 +82,7 @@ fn drain_document_fid_wordcount_into_sorter( let mut key_buffer = Vec::new(); for (fid, count) in document_fid_wordcount.drain() { - if count <= 10 { + if count <= 30 { key_buffer.clear(); key_buffer.extend_from_slice(&fid.to_be_bytes()); key_buffer.push(count as u8); diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 3df8321bc..632f568ab 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -325,8 +325,6 @@ fn send_and_extract_flattened_documents_data( // send docid_word_positions_chunk to DB writer let docid_word_positions_chunk = unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? }; - let _ = lmdb_writer_sx - .send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone()))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair))); diff --git a/milli/src/update/index_documents/helpers/merge_functions.rs b/milli/src/update/index_documents/helpers/merge_functions.rs index 7b8891a7a..64bee95df 100644 --- a/milli/src/update/index_documents/helpers/merge_functions.rs +++ b/milli/src/update/index_documents/helpers/merge_functions.rs @@ -4,7 +4,6 @@ use std::result::Result as StdResult; use roaring::RoaringBitmap; -use super::read_u32_ne_bytes; use crate::heed_codec::CboRoaringBitmapCodec; use crate::update::index_documents::transform::Operation; use crate::Result; @@ -22,10 +21,6 @@ pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result RoaringBitmap { - read_u32_ne_bytes(slice).collect() -} - pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec) -> io::Result<()> { buffer.clear(); buffer.reserve(bitmap.serialized_size()); diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index ce6a2abe9..95e497af4 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -14,8 +14,8 @@ pub use grenad_helpers::{ }; pub use merge_functions::{ concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, - merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, - roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn, + merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs, serialize_roaring_bitmap, + MergeFn, }; use crate::MAX_WORD_LENGTH; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index bbfa1d00c..993f87a1f 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -236,7 +236,7 @@ where primary_key, fields_ids_map, field_distribution, - mut external_documents_ids, + new_external_documents_ids, new_documents_ids, replaced_documents_ids, documents_count, @@ -363,9 +363,6 @@ where deletion_builder.delete_documents(&replaced_documents_ids); let deleted_documents_result = deletion_builder.execute_inner()?; debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); - if !deleted_documents_result.soft_deletion_used { - external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; - } } let index_documents_ids = self.index.documents_ids(self.wtxn)?; @@ -445,6 +442,9 @@ where self.index.put_primary_key(self.wtxn, &primary_key)?; // We write the external documents ids into the main database. + let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?; + external_documents_ids.insert_ids(&new_external_documents_ids)?; + let external_documents_ids = external_documents_ids.into_static(); self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?; let all_documents_ids = index_documents_ids | new_documents_ids; @@ -2471,11 +2471,11 @@ mod tests { { "id": 3, "text": "a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a - a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a " } ])) @@ -2513,6 +2513,171 @@ mod tests { db_snap!(index, word_fid_docids, 3, @"4c2e2a1832e5802796edc1638136d933"); db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f"); - db_snap!(index, docid_word_positions, 3, @"5287245332627675740b28bd46e1cde1"); + } + + #[test] + fn reproduce_the_bug() { + /* + [milli/examples/fuzz.rs:69] &batches = [ + Batch( + [ + AddDoc( + { "id": 1, "doggo": "bernese" }, => internal 0 + ), + ], + ), + Batch( + [ + DeleteDoc( + 1, => delete internal 0 + ), + AddDoc( + { "id": 0, "catto": "jorts" }, => internal 1 + ), + ], + ), + Batch( + [ + AddDoc( + { "id": 1, "catto": "jorts" }, => internal 2 + ), + ], + ), + ] + */ + let mut index = TempIndex::new(); + index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard; + + // START OF BATCH + + println!("--- ENTERING BATCH 1"); + + let mut wtxn = index.write_txn().unwrap(); + + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &index.indexer_config, + index.index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + // OP + + let documents = documents!([ + { "id": 1, "doggo": "bernese" }, + ]); + let (builder, added) = builder.add_documents(documents).unwrap(); + insta::assert_display_snapshot!(added.unwrap(), @"1"); + + // FINISHING + let addition = builder.execute().unwrap(); + insta::assert_debug_snapshot!(addition, @r###" + DocumentAdditionResult { + indexed_documents: 1, + number_of_documents: 1, + } + "###); + wtxn.commit().unwrap(); + + db_snap!(index, documents, @r###" + {"id":1,"doggo":"bernese"} + "###); + db_snap!(index, external_documents_ids, @r###" + soft: + hard: + 1 0 + "###); + + // A first batch of documents has been inserted + + // BATCH 2 + + println!("--- ENTERING BATCH 2"); + + let mut wtxn = index.write_txn().unwrap(); + + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &index.indexer_config, + index.index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap(); + insta::assert_display_snapshot!(removed.unwrap(), @"1"); + + let documents = documents!([ + { "id": 0, "catto": "jorts" }, + ]); + let (builder, added) = builder.add_documents(documents).unwrap(); + insta::assert_display_snapshot!(added.unwrap(), @"1"); + + let addition = builder.execute().unwrap(); + insta::assert_debug_snapshot!(addition, @r###" + DocumentAdditionResult { + indexed_documents: 1, + number_of_documents: 1, + } + "###); + wtxn.commit().unwrap(); + + db_snap!(index, documents, @r###" + {"id":0,"catto":"jorts"} + "###); + + db_snap!(index, external_documents_ids, @r###" + soft: + hard: + 0 1 + "###); + + db_snap!(index, soft_deleted_documents_ids, @"[]"); + + // BATCH 3 + + println!("--- ENTERING BATCH 3"); + + let mut wtxn = index.write_txn().unwrap(); + + let builder = IndexDocuments::new( + &mut wtxn, + &index, + &index.indexer_config, + index.index_documents_config.clone(), + |_| (), + || false, + ) + .unwrap(); + + let documents = documents!([ + { "id": 1, "catto": "jorts" }, + ]); + let (builder, added) = builder.add_documents(documents).unwrap(); + insta::assert_display_snapshot!(added.unwrap(), @"1"); + + let addition = builder.execute().unwrap(); + insta::assert_debug_snapshot!(addition, @r###" + DocumentAdditionResult { + indexed_documents: 1, + number_of_documents: 2, + } + "###); + wtxn.commit().unwrap(); + + db_snap!(index, documents, @r###" + {"id":1,"catto":"jorts"} + {"id":0,"catto":"jorts"} + "###); + + // Ensuring all the returned IDs actually exists + let rtxn = index.read_txn().unwrap(); + let res = index.search(&rtxn).execute().unwrap(); + index.documents(&rtxn, res.documents_ids).unwrap(); } } diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 6097278a7..ee6831be5 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -21,15 +21,14 @@ use crate::error::{Error, InternalError, UserError}; use crate::index::{db_name, main_key}; use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep}; use crate::{ - ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, - Result, BEU32, + FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32, }; pub struct TransformOutput { pub primary_key: String, pub fields_ids_map: FieldsIdsMap, pub field_distribution: FieldDistribution, - pub external_documents_ids: ExternalDocumentsIds<'static>, + pub new_external_documents_ids: fst::Map>, pub new_documents_ids: RoaringBitmap, pub replaced_documents_ids: RoaringBitmap, pub documents_count: usize, @@ -568,8 +567,6 @@ impl<'a, 'i> Transform<'a, 'i> { }))? .to_string(); - let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; - // We create a final writer to write the new documents in order from the sorter. let mut writer = create_writer( self.indexer_settings.chunk_compression_type, @@ -651,13 +648,12 @@ impl<'a, 'i> Transform<'a, 'i> { fst_new_external_documents_ids_builder.insert(key, value) })?; let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map(); - external_documents_ids.insert_ids(&new_external_documents_ids)?; Ok(TransformOutput { primary_key, fields_ids_map: self.fields_ids_map, field_distribution, - external_documents_ids: external_documents_ids.into_static(), + new_external_documents_ids: new_external_documents_ids.map_data(Cow::Owned).unwrap(), new_documents_ids: self.new_documents_ids, replaced_documents_ids: self.replaced_documents_ids, documents_count: self.documents_count, @@ -691,7 +687,8 @@ impl<'a, 'i> Transform<'a, 'i> { let new_external_documents_ids = { let mut external_documents_ids = self.index.external_documents_ids(wtxn)?; external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; - external_documents_ids + // This call should be free and can't fail since the previous method merged both fsts. + external_documents_ids.into_static().to_fst()?.into_owned() }; let documents_ids = self.index.documents_ids(wtxn)?; @@ -776,7 +773,7 @@ impl<'a, 'i> Transform<'a, 'i> { primary_key, fields_ids_map: new_fields_ids_map, field_distribution, - external_documents_ids: new_external_documents_ids.into_static(), + new_external_documents_ids, new_documents_ids: documents_ids, replaced_documents_ids: RoaringBitmap::default(), documents_count, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 53f6d807a..89b10bffe 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -7,24 +7,19 @@ use std::io; use charabia::{Language, Script}; use grenad::MergerBuilder; use heed::types::ByteSlice; -use heed::{BytesDecode, RwTxn}; +use heed::RwTxn; use roaring::RoaringBitmap; use super::helpers::{ - self, merge_ignore_values, roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, - valid_lmdb_key, CursorClonableMmap, + self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap, }; use super::{ClonableMmap, MergeFn}; use crate::facet::FacetType; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::as_cloneable_grenad; -use crate::{ - lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, - Result, -}; +use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result}; pub(crate) enum TypedChunk { - DocidWordPositions(grenad::Reader), FieldIdDocidFacetStrings(grenad::Reader), FieldIdDocidFacetNumbers(grenad::Reader), Documents(grenad::Reader), @@ -56,29 +51,6 @@ pub(crate) fn write_typed_chunk_into_index( ) -> Result<(RoaringBitmap, bool)> { let mut is_merged_database = false; match typed_chunk { - TypedChunk::DocidWordPositions(docid_word_positions_iter) => { - write_entries_into_database( - docid_word_positions_iter, - &index.docid_word_positions, - wtxn, - index_is_empty, - |value, buffer| { - // ensure that values are unique and ordered - let positions = roaring_bitmap_from_u32s_array(value); - BoRoaringBitmapCodec::serialize_into(&positions, buffer); - Ok(buffer) - }, - |new_values, db_values, buffer| { - let new_values = roaring_bitmap_from_u32s_array(new_values); - let positions = match BoRoaringBitmapCodec::bytes_decode(db_values) { - Some(db_values) => new_values | db_values, - None => new_values, // should not happen - }; - BoRoaringBitmapCodec::serialize_into(&positions, buffer); - Ok(()) - }, - )?; - } TypedChunk::Documents(obkv_documents_iter) => { let mut cursor = obkv_documents_iter.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? {