diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1006a064d..3f6cb9462 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,13 +1,13 @@ contact_links: + - name: Support questions & other + url: https://github.com/meilisearch/meilisearch/discussions/new + about: For any other question, open a discussion in this repository - name: Language support request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal?discussions_q=label%3Aproduct%3Acore%3Atokenizer+category%3A%22Feedback+%26+Feature+Proposal%22 about: The requests and feedback regarding Language support are not managed in this repository. Please upvote the related discussion in our dedicated product repository or open a new one if it doesn't exist. - - name: Feature request & feedback + - name: Any other feature request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository - name: Documentation issue url: https://github.com/meilisearch/documentation/issues/new about: For documentation issues, open an issue or a PR in the documentation repository - - name: Support questions & other - url: https://github.com/meilisearch/meilisearch/discussions/new - about: For any other question, open a discussion in this repository diff --git a/.github/scripts/is-latest-release.sh b/.github/scripts/is-latest-release.sh index 81534a2f7..54f0a9d3a 100644 --- a/.github/scripts/is-latest-release.sh +++ b/.github/scripts/is-latest-release.sh @@ -85,7 +85,7 @@ get_latest() { latest="" current_tag="" for release_info in $releases; do - if [ $i -eq 0 ]; then # Cheking tag_name + if [ $i -eq 0 ]; then # Checking tag_name if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release current_tag=$release_info else diff --git a/.github/workflows/create-issue-dependencies.yml b/.github/workflows/create-issue-dependencies.yml index 638088c2e..e3deebe2a 100644 --- a/.github/workflows/create-issue-dependencies.yml +++ b/.github/workflows/create-issue-dependencies.yml @@ -3,7 +3,7 @@ on: schedule: - cron: '0 0 1 */3 *' workflow_dispatch: - + jobs: create-issue: runs-on: ubuntu-latest @@ -12,12 +12,12 @@ jobs: - name: Create an issue uses: actions-ecosystem/action-create-issue@v1 with: - github_token: ${{ secrets.GITHUB_TOKEN }} + github_token: ${{ secrets.MEILI_BOT_GH_PAT }} title: Upgrade dependencies body: | We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the core-team repositories that Meilisearch depends on (milli, charabia, heed...). - ⚠️ This issue should only be done at the beginning of the sprint! + ⚠️ This issue should only be done at the beginning of the sprint! labels: | dependencies maintenance diff --git a/.github/workflows/flaky.yml b/.github/workflows/flaky.yml index 8d34da4d9..fadd6bf96 100644 --- a/.github/workflows/flaky.yml +++ b/.github/workflows/flaky.yml @@ -1,7 +1,8 @@ name: Look for flaky tests on: + workflow_dispatch: schedule: - - cron: "0 12 * * FRI" # every friday at 12:00PM + - cron: "0 12 * * FRI" # Every Friday at 12:00PM jobs: flaky: diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml new file mode 100644 index 000000000..b8a2a1662 --- /dev/null +++ b/.github/workflows/milestone-workflow.yml @@ -0,0 +1,156 @@ +name: Milestone's workflow + +# /!\ No git flow are handled here + +# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed) +# - the roadmap issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/roadmap-issue.md +# - the changelog issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/changelog-issue.md + +# For each Milestone closed +# - the `release_version` label is created +# - this label is applied to all issues/PRs in the Milestone + +on: + milestone: + types: [created, closed] + +env: + MILESTONE_VERSION: ${{ github.event.milestone.title }} + MILESTONE_URL: ${{ github.event.milestone.html_url }} + MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + +jobs: + +# ----------------- +# MILESTONE CREATED +# ----------------- + + get-release-version: + if: github.event.action == 'created' + runs-on: ubuntu-latest + outputs: + is-patch: ${{ steps.check-patch.outputs.is-patch }} + env: + MILESTONE_VERSION: ${{ github.event.milestone.title }} + steps: + - uses: actions/checkout@v3 + - name: Check if this release is a patch release only + id: check-patch + run: | + echo version: $MILESTONE_VERSION + if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then + echo 'This is NOT a patch release' + echo ::set-output name=is-patch::false + elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo 'This is a patch release' + echo ::set-output name=is-patch::true + else + echo "Not a valid format of release, check the Milestone's title." + echo 'Should be vX.Y.Z' + exit 1 + fi + + create-roadmap-issue: + needs: get-release-version + # Create the roadmap issue if the release is not only a patch release + if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE + - name: Replace all empty occurrences in the templates + run: | + # Replace all <> occurrences + sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE + + # Replace all <> occurrences + milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) + sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE + + # Replace release date if exists + if [[ ! -z $MILESTONE_DUE_ON ]]; then + date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1) + sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE + fi + - name: Create the issue + run: | + gh issue create \ + --title "$MILESTONE_VERSION ROADMAP" \ + --label 'epic,impacts docs,impacts integrations,impacts cloud' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION + + create-changelog-issue: + needs: get-release-version + # Create the changelog issue if the release is not only a patch release + if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE + - name: Replace all empty occurrences in the templates + run: | + # Replace all <> occurrences + sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE + + # Replace all <> occurrences + milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) + sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE + - name: Create the issue + run: | + gh issue create \ + --title "Create release changelogs for $MILESTONE_VERSION" \ + --label 'impacts docs,documentation' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION \ + --assignee curquiza + +# ---------------- +# MILESTONE CLOSED +# ---------------- + + create-release-label: + if: github.event.action == 'closed' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Create the ${{ env.MILESTONE_VERSION }} label + run: | + label_description="PRs/issues solved in $MILESTONE_VERSION" + if [[ ! -z $MILESTONE_DUE_ON ]]; then + date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1) + label_description="$label_description released on $date" + fi + + gh api repos/meilisearch/meilisearch/labels \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -f name="$MILESTONE_VERSION" \ + -f description="$label_description" \ + -f color='ff5ba3' + + labelize-all-milestone-content: + if: github.event.action == 'closed' + needs: create-release-label + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone + run: | + prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') + for pr in $prs; do + gh pr edit $pr --add-label $MILESTONE_VERSION + done + - name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone + run: | + issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') + for issue in $issues; do + gh issue edit $issue --add-label $MILESTONE_VERSION + done diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index a9fa50223..96c43c826 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -1,4 +1,7 @@ on: + workflow_dispatch: + schedule: + - cron: '0 2 * * *' # Every day at 2:00am release: types: [published] @@ -8,13 +11,14 @@ jobs: check-version: name: Check the version validity runs-on: ubuntu-latest + # No need to check the version for dry run (cron) steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Check if the tag has the v.. format. # If yes, it means we are publishing an official release. # If no, we are releasing a RC, so no need to check the version. - name: Check tag format - if: github.event_name != 'schedule' + if: github.event_name == 'release' id: check-tag-format run: | escaped_tag=$(printf "%q" ${{ github.ref_name }}) @@ -25,7 +29,7 @@ jobs: echo ::set-output name=stable::false fi - name: Check release validity - if: steps.check-tag-format.outputs.stable == 'true' + if: github.event_name == 'release' && steps.check-tag-format.outputs.stable == 'true' run: bash .github/scripts/check-release.sh publish: @@ -54,14 +58,54 @@ jobs: - uses: actions/checkout@v3 - name: Build run: cargo build --release --locked + # No need to upload binaries for dry run (cron) - name: Upload binaries to release + if: github.event_name == 'release' uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.PUBLISH_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} asset_name: ${{ matrix.asset_name }} tag: ${{ github.ref }} + publish-macos-apple-silicon: + name: Publish binary for macOS silicon + runs-on: ${{ matrix.os }} + needs: check-version + continue-on-error: false + strategy: + fail-fast: false + matrix: + include: + - os: macos-latest + target: aarch64-apple-darwin + asset_name: meilisearch-macos-apple-silicon + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + - name: Installing Rust toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + profile: minimal + target: ${{ matrix.target }} + override: true + - name: Cargo build + uses: actions-rs/cargo@v1 + with: + command: build + args: --release --target ${{ matrix.target }} + - name: Upload the binary to release + # No need to upload binaries for dry run (cron) + if: github.event_name == 'release' + uses: svenstaro/upload-release-action@v1-release + with: + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} + file: target/${{ matrix.target }}/release/meilisearch + asset_name: ${{ matrix.asset_name }} + tag: ${{ github.ref }} + publish-aarch64: name: Publish binary for aarch64 runs-on: ${{ matrix.os }} @@ -110,7 +154,6 @@ jobs: echo '[target.aarch64-unknown-linux-gnu]' >> ~/.cargo/config echo 'linker = "aarch64-linux-gnu-gcc"' >> ~/.cargo/config echo 'JEMALLOC_SYS_WITH_LG_PAGE=16' >> $GITHUB_ENV - echo RUSTFLAGS="-Clink-arg=-fuse-ld=gold" >> $GITHUB_ENV - name: Cargo build uses: actions-rs/cargo@v1 @@ -123,9 +166,11 @@ jobs: run: ls -lR ./target - name: Upload the binary to release + # No need to upload binaries for dry run (cron) + if: github.event_name == 'release' uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.PUBLISH_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch asset_name: ${{ matrix.asset_name }} tag: ${{ github.ref }} diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index 96bf9af9e..028001dcd 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -9,7 +9,7 @@ jobs: name: Check the version validity runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check release validity run: bash .github/scripts/check-release.sh @@ -29,7 +29,7 @@ jobs: - name: Upload debian pkg to release uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.GITHUB_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb asset_name: meilisearch.deb tag: ${{ github.ref }} diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 72234fc01..ab5e7131d 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -12,7 +12,7 @@ jobs: docker: runs-on: docker steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Check if the tag has the v.. format. If yes, it means we are publishing an official release. # In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag): @@ -53,7 +53,7 @@ jobs: uses: docker/metadata-action@v4 with: images: getmeili/meilisearch - # The lastest and `vX.Y` tags are only pushed for the official Meilisearch releases + # The latest and `vX.Y` tags are only pushed for the official Meilisearch releases # See https://github.com/docker/metadata-action#latest-tag flavor: latest=false tags: | @@ -62,10 +62,19 @@ jobs: type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' }} - name: Build and push - id: docker_build uses: docker/build-push-action@v3 with: # We do not push tags for the cron jobs, this is only for test purposes push: ${{ github.event_name != 'schedule' }} platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} + + # /!\ Don't touch this without checking with Cloud team + - name: Send CI information to Cloud team + if: github.event_name != 'schedule' + uses: peter-evans/repository-dispatch@v2 + with: + token: ${{ secrets.MEILI_BOT_GH_PAT }} + repository: meilisearch/meilisearch-cloud + event-type: cloud-docker-build + client-payload: '{ "meilisearch_version": "${{ github.ref_name }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }' diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml new file mode 100644 index 000000000..faf3b0aaa --- /dev/null +++ b/.github/workflows/update-cargo-toml-version.yml @@ -0,0 +1,47 @@ +name: Update Meilisearch version in all Cargo.toml files + +on: + workflow_dispatch: + inputs: + new_version: + description: 'The new version (vX.Y.Z)' + required: true + +env: + NEW_VERSION: ${{ github.event.inputs.new_version }} + NEW_BRANCH: update-version-${{ github.event.inputs.new_version }} + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + +jobs: + + update-version-cargo-toml: + name: Update version in Cargo.toml files + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Install sd + run: cargo install sd + - name: Update Cargo.toml files + run: | + raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2) + new_string="version = \"$raw_new_version\"" + sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml + - name: Build Meilisearch to update Cargo.lock + run: cargo build + - name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch + uses: EndBug/add-and-commit@v9 + with: + message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files" + new_branch: ${{ env.NEW_BRANCH }} + - name: Create the PR pointing to ${{ github.ref_name }} + run: | + gh pr create \ + --title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \ + --body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \ + --label 'skip changelog' \ + --milestone $NEW_VERSION diff --git a/.gitignore b/.gitignore index 8aa76ff15..6fc47753d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,10 @@ /data.ms /snapshots /dumps + + +# Snapshots +## ... large +*.full.snap +## ... unreviewed +*.snap.new diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 000000000..250124b77 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,5 @@ +unstable_features = true + +use_small_heuristics = "max" +imports_granularity = "Module" +group_imports = "StdExternalCrate" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c40c7dac..a335460ab 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,12 +10,24 @@ If Meilisearch does not offer optimized support for your language, please consid ## Table of Contents +- [Hacktoberfest 2022](#hacktoberfest-2022) - [Assumptions](#assumptions) - [How to Contribute](#how-to-contribute) - [Development Workflow](#development-workflow) - [Git Guidelines](#git-guidelines) - [Release Process (for internal team only)](#release-process-for-internal-team-only) +## Hacktoberfest 2022 + +It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳 + +Thanks so much for participating with Meilisearch this year! +1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.com/participation/#spam)). Our reviewers will not consider any PR that doesn’t match that standard. +2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, there’s no need to panic; we will get around to your contribution. +3. There will be no issue assignment as we don’t want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best! + +You can check out the longer, more complete guideline documentation [here](https://github.com/meilisearch/.github/blob/main/Hacktoberfest_2022_contributors_guidelines.md). + ## Assumptions 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.** @@ -102,7 +114,7 @@ The full Meilisearch release process is described in [this guide](https://github ### Release assets For each release, the following assets are created: -- Binaries for differents platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release +- Binaries for different platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release - Binaries are pushed to HomeBrew and APT (not published for RC) - Docker tags are created/updated: - `vX.Y.Z` diff --git a/Cargo.lock b/Cargo.lock index 33da05183..525b92976 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -21,9 +21,9 @@ dependencies = [ [[package]] name = "actix-cors" -version = "0.6.2" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a0adcaabb68f1dfe8880cb3c5f049261c68f5d69ce06b6f3a930f31710838e" +checksum = "684a6ce1562a5fcca49bc9302896c63547eea78a1e405e837e7416affd8b6eb9" dependencies = [ "actix-utils", "actix-web", @@ -59,7 +59,7 @@ dependencies = [ "http", "httparse", "httpdate", - "itoa 1.0.3", + "itoa 1.0.4", "language-tags", "local-channel", "mime", @@ -78,21 +78,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "465a6172cf69b960917811022d8f29bc0b7fa1398bc4f78b3c466673db1213b6" dependencies = [ "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] name = "actix-router" -version = "0.5.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb60846b52c118f2f04a56cc90880a274271c489b2498623d58176f8ca21fa80" +checksum = "d66ff4d247d2b160861fa2866457e85706833527840e4133f8f49aa423a38799" dependencies = [ "bytestring", - "firestorm", "http", - "log", "regex", "serde", + "tracing", ] [[package]] @@ -155,9 +154,9 @@ dependencies = [ [[package]] name = "actix-utils" -version = "3.0.0" +version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e491cbaac2e7fc788dfff99ff48ef317e23b3cf63dbaf7aaab6418f40f92aa94" +checksum = "88a1dcdff1466e3c2488e1cb5c36a71822750ad43839937f85d2f4d9f8b705d8" dependencies = [ "local-waker", "pin-project-lite", @@ -189,7 +188,7 @@ dependencies = [ "futures-core", "futures-util", "http", - "itoa 1.0.3", + "itoa 1.0.4", "language-tags", "log", "mime", @@ -201,7 +200,7 @@ dependencies = [ "serde_urlencoded", "smallvec", "socket2", - "time 0.3.14", + "time", "url", ] @@ -212,9 +211,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa9362663c8643d67b2d5eafba49e4cb2c8a053a29ed00a0bea121f17c76b13" dependencies = [ "actix-router", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -243,6 +242,18 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "aes" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8" +dependencies = [ + "cfg-if", + "cipher", + "cpufeatures", + "opaque-debug", +] + [[package]] name = "ahash" version = "0.3.8" @@ -286,9 +297,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.64" +version = "1.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9a8f622bcf6ff3df478e9deba3e03e4e04b300f8e6a139e192c05fa3490afc7" +checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" dependencies = [ "backtrace", ] @@ -319,20 +330,20 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] name = "async-trait" -version = "0.1.57" +version = "0.1.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76464446b8bc32758d7e88ee1a804d9914cd9b1cb264c029899680b0be29826f" +checksum = "1e805d94e6b5001b651426cf4cd446b1ab5f319d27bab5c644f61de0a804360c" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -344,12 +355,6 @@ dependencies = [ "critical-section", ] -[[package]] -name = "atomic_refcell" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b5e5f48b927f04e952dedc932f31995a65a0bf65ec971c74436e51bf6e970d" - [[package]] name = "atty" version = "0.2.14" @@ -399,9 +404,15 @@ checksum = "f8fe8f5a8a398345e52358e18ff07cc17a568fbca5c6f73873d3a62056309603" [[package]] name = "base64" -version = "0.13.0" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64ct" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf" [[package]] name = "big_s" @@ -502,6 +513,18 @@ dependencies = [ "serde", ] +[[package]] +name = "bstr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fca0852af221f458706eb0725c03e4ed6c46af9ac98e6a689d5e634215d594dd" +dependencies = [ + "memchr", + "once_cell", + "regex-automata", + "serde", +] + [[package]] name = "build_const" version = "0.2.2" @@ -510,9 +533,9 @@ checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7" [[package]] name = "bumpalo" -version = "3.4.0" +version = "3.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" [[package]] name = "byte-unit" @@ -545,9 +568,9 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9e1f5fa78f69496407a27ae9ed989e3c3b072310286f5ef385525e4cbc24a9" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -594,12 +617,11 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.11.6" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4419e9adae9fd7e231b60d50467481bf8181ddeef6ed54683b23ae925c74c9c" +checksum = "6a621d5d6d6c8d086dbaf1fe659981da41a1b63c6bdbba30b4dbb592c6d3bd49" dependencies = [ "serde", - "serde_derive", "toml", ] @@ -614,9 +636,9 @@ dependencies = [ [[package]] name = "cedarwood" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa312498f9f41452998d984d3deb84c84f86aeb8a2499d7505bb8106d78d147d" +checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90" dependencies = [ "smallvec", ] @@ -657,9 +679,9 @@ dependencies = [ [[package]] name = "character_converter" -version = "2.1.3" +version = "2.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75387a5aa327fed13de2adb87ec4bcb351943bfb30af7004405a39da430c390" +checksum = "14eb54f15451a7095181d32b3ac148ba3684ab8dc261a74208b2063c9293bb1c" dependencies = [ "bincode", "fst", @@ -667,15 +689,24 @@ dependencies = [ ] [[package]] -name = "clap" -version = "3.2.20" +name = "cipher" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23b71c3ce99b7611011217b366d923f1d0a7e07a92bb2dbf1e84508c673ca3bd" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +dependencies = [ + "generic-array", +] + +[[package]] +name = "clap" +version = "3.2.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" dependencies = [ "atty", "bitflags", - "clap_derive", - "clap_lex", + "clap_derive 3.2.18", + "clap_lex 0.2.4", "indexmap", "once_cell", "strsim", @@ -683,6 +714,21 @@ dependencies = [ "textwrap", ] +[[package]] +name = "clap" +version = "4.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335867764ed2de42325fafe6d18b8af74ba97ee0c590fa016f157535b42ab04b" +dependencies = [ + "atty", + "bitflags", + "clap_derive 4.0.18", + "clap_lex 0.3.0", + "once_cell", + "strsim", + "termcolor", +] + [[package]] name = "clap_derive" version = "3.2.18" @@ -691,9 +737,22 @@ checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ "heck", "proc-macro-error", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", +] + +[[package]] +name = "clap_derive" +version = "4.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16a1b0f6422af32d5da0c58e2703320f379216ee70198241c84173a8c5ac28f3" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", ] [[package]] @@ -705,17 +764,45 @@ dependencies = [ "os_str_bytes", ] +[[package]] +name = "clap_lex" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d4198f73e42b4936b35b5bb248d81d2b595ecb170da0bac7655c54eedfa8da8" +dependencies = [ + "os_str_bytes", +] + [[package]] name = "concat-arrays" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df715824eb382e34b7afb7463b0247bf41538aeba731fba05241ecdb5dc3747" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] +[[package]] +name = "console" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c050367d967ced717c04b65d8c619d863ef9292ce0c5760028655a2fb298718c" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "terminal_size", + "winapi", +] + +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + [[package]] name = "convert_case" version = "0.4.0" @@ -724,12 +811,12 @@ checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" [[package]] name = "cookie" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94d4706de1b0fa5b132270cddffa8585166037822e260a944fe161acd137ca05" +checksum = "344adc371239ef32293cb1c4fe519592fcf21206c79c02854320afcdf3ab4917" dependencies = [ "percent-encoding", - "time 0.3.14", + "time", "version_check", ] @@ -796,6 +883,20 @@ dependencies = [ "riscv", ] +[[package]] +name = "crossbeam" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + [[package]] name = "crossbeam-channel" version = "0.5.6" @@ -819,15 +920,14 @@ dependencies = [ [[package]] name = "crossbeam-epoch" -version = "0.9.10" +version = "0.9.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1" +checksum = "f916dfc5d356b0ed9dae65f1db9fc9770aa2851d2662b988ccf4fe3516e86348" dependencies = [ "autocfg", "cfg-if", "crossbeam-utils", "memoffset", - "once_cell", "scopeguard", ] @@ -843,12 +943,11 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc" +checksum = "edbafec5fa1f196ca66527c1b12c2ec4745ca14b50f1ad8f9f6f720b55d11fac" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -867,7 +966,7 @@ version = "1.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ - "bstr", + "bstr 0.2.17", "csv-core", "itoa 0.4.8", "ryu", @@ -884,14 +983,69 @@ dependencies = [ ] [[package]] -name = "derivative" -version = "2.2.0" +name = "darling" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +checksum = "4529658bdda7fd6769b8614be250cdcfc3aeb0ee72fe66f9e41e5e5eb73eac02" dependencies = [ - "proc-macro2 1.0.43", + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "649c91bc01e8b1eac09fb91e8dbc7d517684ca6be8ebc75bb9cafc894f9fdb6f" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "strsim", + "syn 1.0.103", +] + +[[package]] +name = "darling_macro" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc69c5bfcbd2fc09a0f38451d2daf0e372e367986a83906d1b0dbc88134fb5" +dependencies = [ + "darling_core", + "quote 1.0.21", + "syn 1.0.103", +] + +[[package]] +name = "derive_builder" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07adf7be193b71cc36b193d0f5fe60b918a3a9db4dad0449f57bcfd519704a3" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f91d4cfa921f1c05904dc3c57b4a32c38aed3340cce209f3a6fd1478babafc4" +dependencies = [ + "darling", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", +] + +[[package]] +name = "derive_builder_macro" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f0314b72bed045f3a68671b3c86328386762c93f82d98c65c3cb5e5f573dd68" +dependencies = [ + "derive_builder_core", + "syn 1.0.103", ] [[package]] @@ -901,10 +1055,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ "convert_case", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", "rustc_version 0.4.0", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -913,17 +1067,11 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08ff6a4480d42625e59bc4e8b5dc3723279fd24d83afe8aa20df217276261cd6" -[[package]] -name = "difflib" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" - [[package]] name = "digest" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" +checksum = "adfbc57365a37acbd2ebf2b64d7e69bb766e2fea813521ed536f5d0520dcf86c" dependencies = [ "block-buffer", "crypto-common", @@ -952,10 +1100,29 @@ dependencies = [ ] [[package]] -name = "downcast" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1435fa1053d8b2fbbe9be7e97eca7f33d37b28409959813daefc1446a14247f1" +name = "dump" +version = "0.30.0" +dependencies = [ + "anyhow", + "big_s", + "flate2", + "http", + "log", + "maplit", + "meili-snap", + "meilisearch-auth", + "meilisearch-types", + "once_cell", + "regex", + "roaring", + "serde", + "serde_json", + "tar", + "tempfile", + "thiserror", + "time", + "uuid 1.2.1", +] [[package]] name = "either" @@ -976,6 +1143,12 @@ dependencies = [ "void", ] +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "encoding" version = "0.2.33" @@ -1049,33 +1222,13 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "enum-iterator" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eeac5c5edb79e4e39fe8439ef35207780a11f69c52cbe424ce3dfad4cb78de6" -dependencies = [ - "enum-iterator-derive 0.7.0", -] - [[package]] name = "enum-iterator" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45a0ac4aeb3a18f92eaf09c6bb9b3ac30ff61ca95514fc58cbead1c9a6bf5401" dependencies = [ - "enum-iterator-derive 1.1.0", -] - -[[package]] -name = "enum-iterator-derive" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159" -dependencies = [ - "proc-macro2 1.0.43", - "quote 1.0.21", - "syn 1.0.99", + "enum-iterator-derive", ] [[package]] @@ -1084,16 +1237,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "828de45d0ca18782232dfb8f3ea9cc428e8ced380eb26a520baaacfc70de39ce" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] name = "env_logger" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b2cf0344971ee6c64c31be0d530793fba457d322dfec2810c453d0ef228f9c3" +checksum = "c90bf5f19754d10198ccb95b70664fc925bd1fc090a0fd9a6ebc54acc8cd6272" dependencies = [ "atty", "humantime", @@ -1102,6 +1255,27 @@ dependencies = [ "termcolor", ] +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "fastrand" version = "1.8.0" @@ -1112,32 +1286,59 @@ dependencies = [ ] [[package]] -name = "filetime" -version = "0.2.17" +name = "faux" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +checksum = "7c3b5e56a69ca67c241191cd9d484e14fb0fe89f5e539c2e8448eafd1f65c1f0" +dependencies = [ + "faux_macros", + "paste", +] + +[[package]] +name = "faux_macros" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35c9bb4a2c13ffb3a93a39902aaf4e7190a1706a4779b6db0449aee433d26c4a" +dependencies = [ + "darling", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", + "uuid 0.8.2", +] + +[[package]] +name = "file-store" +version = "0.30.0" +dependencies = [ + "faux", + "tempfile", + "thiserror", + "uuid 1.2.1", +] + +[[package]] +name = "filetime" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9663d381d07ae25dc88dbdf27df458faa83a9b25336bcac83d5e452b5fc9d3" dependencies = [ "cfg-if", "libc", "redox_syscall", - "windows-sys", + "windows-sys 0.42.0", ] [[package]] name = "filter-parser" -version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.5#4fc6331cb6526c07f3137584564cfe3493fb25bd" +version = "0.37.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.0#57c9f03e514436a2cca799b2a28cd89247682be0" dependencies = [ "nom", "nom_locate", ] -[[package]] -name = "firestorm" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c5f6c2c942da57e2aaaa84b8a521489486f14e75e7fa91dab70aba913975f98" - [[package]] name = "flate2" version = "1.0.24" @@ -1150,21 +1351,12 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.5#4fc6331cb6526c07f3137584564cfe3493fb25bd" +version = "0.37.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.0#57c9f03e514436a2cca799b2a28cd89247682be0" dependencies = [ "serde_json", ] -[[package]] -name = "float-cmp" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" -dependencies = [ - "num-traits", -] - [[package]] name = "fnv" version = "1.0.7" @@ -1180,18 +1372,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fragile" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85dcb89d2b10c5f6133de2efd8c11959ce9dbb46a2f7a4cab208c4eeda6ce1ab" - -[[package]] -name = "fs_extra" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" - [[package]] name = "fst" version = "0.4.7" @@ -1200,9 +1380,9 @@ checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" [[package]] name = "futures" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f21eda599937fba36daeb58a22e8f5cee2d14c4a17b5b7739c7c8e5e3b8230c" +checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" dependencies = [ "futures-channel", "futures-core", @@ -1215,9 +1395,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30bdd20c28fadd505d0fd6712cdfcb0d4b5648baf45faef7f852afb2399bb050" +checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" dependencies = [ "futures-core", "futures-sink", @@ -1225,15 +1405,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e5aa3de05362c3fb88de6531e6296e85cde7739cccad4b9dfeeb7f6ebce56bf" +checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" [[package]] name = "futures-executor" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ff63c23854bee61b6e9cd331d523909f238fc7636290b96826e9cfa5faa00ab" +checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" dependencies = [ "futures-core", "futures-task", @@ -1242,38 +1422,38 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbf4d2a7a308fd4578637c0b17c7e1c7ba127b8f6ba00b29f717e9655d85eb68" +checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb" [[package]] name = "futures-macro" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42cd15d1c7456c04dbdf7e88bcd69760d74f3a798d6444e16974b505b0e62f17" +checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] name = "futures-sink" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b20ba5a92e727ba30e72834706623d94ac93a725410b6a6b6fbc1b07f7ba56" +checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" [[package]] name = "futures-task" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6508c467c73851293f390476d4491cf4d227dbabcd4170f3bb6044959b294f1" +checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" [[package]] name = "futures-util" -version = "0.3.24" +version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44fb6cb1be61cc1d2e43b262516aafcf63b241cffdb1d3fa115f91d9c7b09c90" +checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" dependencies = [ "futures-channel", "futures-core", @@ -1308,19 +1488,19 @@ dependencies = [ [[package]] name = "geoutils" -version = "0.4.1" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e006f616a407d396ace1d2ebb3f43ed73189db8b098079bd129928d7645dd1e" +checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad" [[package]] name = "getrandom" -version = "0.2.7" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -1330,9 +1510,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e45727250e75cc04ff2846a66397da8ef2b3db8e40e0cef4df67950a07621eb9" dependencies = [ "proc-macro-error", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -1362,9 +1542,9 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "grenad" -version = "0.4.2" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e8454188b8caee0627ff58636048963b6abd07e5862b4c9a8f9cfd349d50c26" +checksum = "5232b2d157b7bf63d7abe1b12177039e58db2f29e377517c0cdee1578cca4c93" dependencies = [ "bytemuck", "byteorder", @@ -1373,9 +1553,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.3.14" +version = "0.3.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ca32592cf21ac7ccab1825cd87f6c9b3d9022c44d086172ed0966bec8af30be" +checksum = "5f9f29bc9dda355256b2916cf526ab02ce0aeaaaf2bad60d65ef3f12f11dd0f4" dependencies = [ "bytes", "fnv", @@ -1409,12 +1589,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" - [[package]] name = "hashbrown" version = "0.12.3" @@ -1442,8 +1616,8 @@ checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "heed" -version = "0.12.2" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.3#076971765f4ce09591ed7e19e45ea817580a53e3" +version = "0.12.4" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.4#7a4542bc72dd60ef0f508c89900ea292218223fb" dependencies = [ "byteorder", "heed-traits", @@ -1460,12 +1634,12 @@ dependencies = [ [[package]] name = "heed-traits" version = "0.7.0" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.3#076971765f4ce09591ed7e19e45ea817580a53e3" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.4#7a4542bc72dd60ef0f508c89900ea292218223fb" [[package]] name = "heed-types" version = "0.7.2" -source = "git+https://github.com/meilisearch/heed?tag=v0.12.3#076971765f4ce09591ed7e19e45ea817580a53e3" +source = "git+https://github.com/meilisearch/heed?tag=v0.12.4#7a4542bc72dd60ef0f508c89900ea292218223fb" dependencies = [ "bincode", "heed-traits", @@ -1506,7 +1680,7 @@ checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" dependencies = [ "bytes", "fnv", - "itoa 1.0.3", + "itoa 1.0.4", ] [[package]] @@ -1553,7 +1727,7 @@ dependencies = [ "http-body", "httparse", "httpdate", - "itoa 1.0.3", + "itoa 1.0.4", "pin-project-lite", "socket2", "tokio", @@ -1575,6 +1749,12 @@ dependencies = [ "tokio-rustls", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.3.0" @@ -1585,6 +1765,34 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "index-scheduler" +version = "0.30.0" +dependencies = [ + "anyhow", + "big_s", + "bincode", + "crossbeam", + "csv", + "derive_builder", + "dump", + "enum-iterator", + "file-store", + "insta", + "log", + "meili-snap", + "meilisearch-types", + "nelson", + "roaring", + "serde", + "serde_json", + "synchronoise", + "tempfile", + "thiserror", + "time", + "uuid 1.2.1", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -1596,6 +1804,22 @@ dependencies = [ "serde", ] +[[package]] +name = "insta" +version = "1.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581d4e3314cae4536e5d22ffd23189d4a374696c5ef733eadafae0ed273fd303" +dependencies = [ + "console", + "lazy_static", + "linked-hash-map", + "pest", + "pest_derive", + "serde", + "similar", + "yaml-rust", +] + [[package]] name = "instant" version = "0.1.12" @@ -1605,6 +1829,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "io-lifetimes" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e481ccbe3dea62107216d0d1138bb8ad8e5e5c43009a098bd1990272c497b0" + [[package]] name = "ipnet" version = "2.5.0" @@ -1613,9 +1843,9 @@ checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" [[package]] name = "itertools" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] @@ -1628,19 +1858,19 @@ checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" [[package]] name = "itoa" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" +checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" [[package]] name = "jieba-rs" -version = "0.6.6" +version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c7e12f50325401dde50c29ca32cff44bae20873135b39f4e19ecf305226dd80" +checksum = "37228e06c75842d1097432d94d02f37fe3ebfca9791c2e8fef6e9db17ed128c1" dependencies = [ "cedarwood", "fxhash", - "hashbrown 0.11.2", + "hashbrown 0.12.3", "lazy_static", "phf", "phf_codegen", @@ -1649,26 +1879,26 @@ dependencies = [ [[package]] name = "jobserver" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" dependencies = [ "libc", ] [[package]] name = "js-sys" -version = "0.3.55" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" +checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" dependencies = [ "wasm-bindgen", ] [[package]] name = "json-depth-checker" -version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.5#4fc6331cb6526c07f3137584564cfe3493fb25bd" +version = "0.37.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.0#57c9f03e514436a2cca799b2a28cd89247682be0" dependencies = [ "serde_json", ] @@ -1710,9 +1940,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.132" +version = "0.2.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" +checksum = "fc7fcc620a3bff7cdd7a365be3376c97191aeaccc2a603e600951e452615bf89" [[package]] name = "libgit2-sys" @@ -1734,9 +1964,9 @@ checksum = "292a948cd991e376cf75541fe5b97a1081d713c618b4f1b9500f8844e49eb565" [[package]] name = "libmimalloc-sys" -version = "0.1.25" +version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" +checksum = "8fc093ab289b0bfda3aa1bdfab9c9542be29c7ef385cfcbe77f8c9813588eb48" dependencies = [ "cc", ] @@ -1784,7 +2014,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap", + "clap 3.2.23", "csv", "encoding", "env_logger", @@ -1859,7 +2089,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap", + "clap 3.2.23", "encoding", "env_logger", "glob", @@ -1879,7 +2109,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap", + "clap 3.2.23", "csv", "encoding", "env_logger", @@ -1899,7 +2129,7 @@ dependencies = [ "anyhow", "bincode", "byteorder", - "clap", + "clap 3.2.23", "csv", "encoding", "env_logger", @@ -1910,10 +2140,22 @@ dependencies = [ "yada", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + +[[package]] +name = "linux-raw-sys" +version = "0.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4d2456c373231a208ad294c33dc5bff30051eafd954cd4caae83a712b12854d" + [[package]] name = "lmdb-rkv-sys" -version = "0.15.0" -source = "git+https://github.com/meilisearch/lmdb-rs#d0b50d02938ee84e4e4372697ea991fe2a4cae3b" +version = "0.15.1" +source = "git+https://github.com/meilisearch/lmdb-rs#5592bf5a812905cf0c633404ef8f8f4057112c65" dependencies = [ "cc", "libc", @@ -1940,9 +2182,9 @@ checksum = "e34f76eb3611940e0e7d53a9aaa4e6a3151f69541a282fd0dad5571420c53ff1" [[package]] name = "lock_api" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f80bf5aacaf25cbfc8210d1cfb718f2bf3b11c4c54e5afe36c236853a8ec390" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" dependencies = [ "autocfg", "scopeguard", @@ -1974,9 +2216,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81" dependencies = [ "log", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -1996,9 +2238,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f08150cf2bab1fc47c2196f4f41173a27fcd0f684165e5458c0046b53a472e2f" dependencies = [ "once_cell", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -2008,27 +2250,43 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" [[package]] -name = "meilisearch-auth" -version = "0.29.2" +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + +[[package]] +name = "meili-snap" +version = "0.30.0" dependencies = [ - "enum-iterator 0.7.0", + "insta", + "md5", + "once_cell", +] + +[[package]] +name = "meilisearch-auth" +version = "0.30.0" +dependencies = [ + "enum-iterator", "hmac", "meilisearch-types", - "milli", "rand", + "roaring", "serde", "serde_json", "sha2", "thiserror", - "time 0.3.14", - "uuid", + "time", + "uuid 1.2.1", ] [[package]] name = "meilisearch-http" -version = "0.29.2" +version = "0.30.0" dependencies = [ "actix-cors", + "actix-http", "actix-rt", "actix-web", "actix-web-static-files", @@ -2036,20 +2294,24 @@ dependencies = [ "assert-json-diff", "async-stream", "async-trait", - "bstr", + "brotli", + "bstr 1.0.1", "byte-unit", "bytes", "cargo_toml", - "clap", + "clap 4.0.18", "crossbeam-channel", + "dump", "either", "env_logger", + "file-store", "flate2", "fst", "futures", "futures-util", "hex", "http", + "index-scheduler", "indexmap", "itertools", "jsonwebtoken", @@ -2057,8 +2319,8 @@ dependencies = [ "log", "manifest-dir-macros", "maplit", + "meili-snap", "meilisearch-auth", - "meilisearch-lib", "meilisearch-types", "mimalloc", "mime", @@ -2066,6 +2328,7 @@ dependencies = [ "obkv", "once_cell", "parking_lot", + "permissive-json-pointer", "pin-project-lite", "platform-dirs", "prometheus", @@ -2074,7 +2337,7 @@ dependencies = [ "regex", "reqwest", "rustls", - "rustls-pemfile 0.3.0", + "rustls-pemfile", "segment", "serde", "serde-cs", @@ -2086,13 +2349,15 @@ dependencies = [ "static-files", "sysinfo", "tar", + "temp-env", "tempfile", "thiserror", - "time 0.3.14", + "time", "tokio", "tokio-stream", + "toml", "urlencoding", - "uuid", + "uuid 1.2.1", "vergen", "walkdir", "yaup", @@ -2100,77 +2365,29 @@ dependencies = [ ] [[package]] -name = "meilisearch-lib" -version = "0.29.2" +name = "meilisearch-types" +version = "0.30.0" dependencies = [ - "actix-rt", "actix-web", "anyhow", - "async-stream", - "async-trait", - "atomic_refcell", - "byte-unit", - "bytes", - "clap", - "crossbeam-channel", "csv", - "derivative", "either", + "enum-iterator", "flate2", - "fs_extra", "fst", - "futures", - "futures-util", - "http", - "indexmap", - "itertools", - "lazy_static", - "log", - "meilisearch-auth", - "meilisearch-types", + "insta", + "meili-snap", "milli", - "mime", - "mockall", - "nelson", - "num_cpus", - "obkv", - "once_cell", - "page_size", - "parking_lot", - "paste", - "permissive-json-pointer", "proptest", "proptest-derive", - "rand", - "rayon", - "regex", - "reqwest", "roaring", - "rustls", "serde", "serde_json", - "siphasher", - "slice-group-by", - "sysinfo", "tar", - "tempfile", "thiserror", - "time 0.3.14", + "time", "tokio", - "uuid", - "walkdir", - "whoami", -] - -[[package]] -name = "meilisearch-types" -version = "0.29.2" -dependencies = [ - "actix-web", - "proptest", - "proptest-derive", - "serde", - "serde_json", + "uuid 1.2.1", ] [[package]] @@ -2199,12 +2416,12 @@ dependencies = [ [[package]] name = "milli" -version = "0.33.4" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.5#4fc6331cb6526c07f3137584564cfe3493fb25bd" +version = "0.37.0" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.37.0#57c9f03e514436a2cca799b2a28cd89247682be0" dependencies = [ "bimap", "bincode", - "bstr", + "bstr 1.0.1", "byteorder", "charabia", "concat-arrays", @@ -2238,15 +2455,15 @@ dependencies = [ "smartstring", "tempfile", "thiserror", - "time 0.3.14", - "uuid", + "time", + "uuid 1.2.1", ] [[package]] name = "mimalloc" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" +checksum = "76ce6a4b40d3bff9eb3ce9881ca0737a85072f9f975886082640cd46a75cdb35" dependencies = [ "libmimalloc-sys", ] @@ -2284,41 +2501,14 @@ dependencies = [ [[package]] name = "mio" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +checksum = "e5d732bc30207a6423068df043e3d02e0735b155ad7ce1a6f76fe2baa5b158de" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys", -] - -[[package]] -name = "mockall" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2be9a9090bc1cac2930688fa9478092a64c6a92ddc6ae0692d46b37d9cab709" -dependencies = [ - "cfg-if", - "downcast", - "fragile", - "lazy_static", - "mockall_derive", - "predicates", - "predicates-tree", -] - -[[package]] -name = "mockall_derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86d702a0530a0141cf4ed147cf5ec7be6f2c187d4e37fcbefc39cf34116bfe8f" -dependencies = [ - "cfg-if", - "proc-macro2 1.0.43", - "quote 1.0.21", - "syn 1.0.99", + "wasi", + "windows-sys 0.42.0", ] [[package]] @@ -2362,17 +2552,11 @@ dependencies = [ "nom", ] -[[package]] -name = "normalize-line-endings" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" - [[package]] name = "ntapi" -version = "0.3.7" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f" +checksum = "bc51db7b362b205941f71232e56c625156eb9a929f8cf74a428fd5bc094a4afc" dependencies = [ "winapi", ] @@ -2444,15 +2628,21 @@ checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385" [[package]] name = "once_cell" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0" +checksum = "e82dad04139b71a90c080c8463fe0dc7902db5192d939bd0950f074d014339e1" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" [[package]] name = "ordered-float" -version = "2.10.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7940cf2ca942593318d07fcf2596cdca60a85c9e7fab408a5e21a4f9dcd40d87" +checksum = "1f74e330193f90ec45e2b257fa3ef6df087784157ac1ad2c1e71c62837b03aa7" dependencies = [ "num-traits", ] @@ -2485,15 +2675,26 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +checksum = "4dc9e0dc2adc1c69d09143aff38d3d30c5c3f0df0dad82e6d25547af174ebec0" dependencies = [ "cfg-if", "libc", "redox_syscall", "smallvec", - "windows-sys", + "windows-sys 0.42.0", +] + +[[package]] +name = "password-hash" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" +dependencies = [ + "base64ct", + "rand_core", + "subtle", ] [[package]] @@ -2517,6 +2718,18 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498a099351efa4becc6a19c72aa9270598e8fd274ca47052e37455241c88b696" +[[package]] +name = "pbkdf2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" +dependencies = [ + "digest", + "hmac", + "password-hash", + "sha2", +] + [[package]] name = "pem" version = "1.1.0" @@ -2534,26 +2747,70 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" [[package]] name = "permissive-json-pointer" -version = "0.29.2" +version = "0.30.0" dependencies = [ "big_s", "serde_json", ] [[package]] -name = "phf" -version = "0.10.1" +name = "pest" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +checksum = "dbc7bc69c062e492337d74d59b120c274fd3d261b6bf6d3207d499b4b379c41a" +dependencies = [ + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b75706b9642ebcb34dab3bc7750f811609a0eb1dd8b88c2d15bf628c1c65b2" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f9272122f5979a6511a749af9db9bfc810393f63119970d7085fed1c4ea0db" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2 1.0.47", + "quote 1.0.21", + "syn 1.0.103", +] + +[[package]] +name = "pest_meta" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c8717927f9b79515e565a64fe46c38b8cd0427e64c40680b14a7365ab09ac8d" +dependencies = [ + "once_cell", + "pest", + "sha1", +] + +[[package]] +name = "phf" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "928c6535de93548188ef63bb7c4036bd415cd8f36ad25af44b9789b2ee72a48c" dependencies = [ "phf_shared", ] [[package]] name = "phf_codegen" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +checksum = "a56ac890c5e3ca598bbdeaa99964edb5b0258a583a9eb6ef4e89fc85d9224770" dependencies = [ "phf_generator", "phf_shared", @@ -2561,9 +2818,9 @@ dependencies = [ [[package]] name = "phf_generator" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +checksum = "b1181c94580fa345f50f19d738aaa39c0ed30a600d95cb2d3e23f94266f14fbf" dependencies = [ "phf_shared", "rand", @@ -2571,9 +2828,9 @@ dependencies = [ [[package]] name = "phf_shared" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +checksum = "e1fb5f6f826b772a8d4c0394209441e7d37cbbb967ae9c7e0e8134365c9ee676" dependencies = [ "siphasher", ] @@ -2592,9 +2849,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.25" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "platform-dirs" @@ -2611,36 +2868,6 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" -[[package]] -name = "predicates" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5aab5be6e4732b473071984b3164dbbfb7a3674d30ea5ff44410b6bcd960c3c" -dependencies = [ - "difflib", - "float-cmp", - "itertools", - "normalize-line-endings", - "predicates-core", - "regex", -] - -[[package]] -name = "predicates-core" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da1c2388b1513e1b605fcec39a95e0a9e8ef088f71443ef37099fa9ae6673fcb" - -[[package]] -name = "predicates-tree" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d86de6de25020a36c6d3643a86d9a6a9f552107c0559c60ea03551b5e16c032" -dependencies = [ - "predicates-core", - "termtree", -] - [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2648,9 +2875,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" dependencies = [ "proc-macro-error-attr", - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", "version_check", ] @@ -2660,7 +2887,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", "version_check", ] @@ -2676,31 +2903,31 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" dependencies = [ "unicode-ident", ] [[package]] name = "procfs" -version = "0.12.0" +version = "0.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0941606b9934e2d98a3677759a971756eb821f75764d0e0d26946d08e74d9104" +checksum = "2dfb6451c91904606a1abe93e83a8ec851f45827fa84273f256ade45dc095818" dependencies = [ "bitflags", "byteorder", "hex", "lazy_static", - "libc", + "rustix", ] [[package]] name = "prometheus" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c8babc29389186697fe5a2a4859d697825496b83db5d0b65271cdc0488e88c" +checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" dependencies = [ "cfg-if", "fnv", @@ -2746,9 +2973,9 @@ dependencies = [ [[package]] name = "protobuf" -version = "2.27.1" +version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf7e6d18738ecd0902d30d1ad232c9125985a3422929b16c65517b38adc14f96" +checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" [[package]] name = "quick-error" @@ -2777,7 +3004,7 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", ] [[package]] @@ -2803,9 +3030,9 @@ dependencies = [ [[package]] name = "rand_core" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] @@ -2897,9 +3124,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.11.11" +version = "0.11.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b75aa69a3f06bbcc66ede33af2af253c6f7a86b1ca0033f60c580a27074fbf92" +checksum = "431949c384f4e2ae07605ccaa56d1d9d2ecdb5cadd4f9577ccfab29f2e5149fc" dependencies = [ "base64", "bytes", @@ -2913,13 +3140,13 @@ dependencies = [ "hyper-rustls", "ipnet", "js-sys", - "lazy_static", "log", "mime", + "once_cell", "percent-encoding", "pin-project-lite", "rustls", - "rustls-pemfile 1.0.1", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", @@ -2936,9 +3163,9 @@ dependencies = [ [[package]] name = "retain_mut" -version = "0.1.9" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" +checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" [[package]] name = "ring" @@ -2978,13 +3205,14 @@ dependencies = [ [[package]] name = "roaring" -version = "0.9.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd539cab4e32019956fe7e0cf160bb6d4802f4be2b52c4253d76d3bb0f85a5f7" +checksum = "ef0fb5e826a8bde011ecae6a8539dd333884335c57ff0f003fbe27c25bbe8f71" dependencies = [ "bytemuck", "byteorder", "retain_mut", + "serde", ] [[package]] @@ -3020,14 +3248,28 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" dependencies = [ - "semver 1.0.13", + "semver 1.0.14", +] + +[[package]] +name = "rustix" +version = "0.35.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "985947f9b6423159c4726323f373be0a21bdb514c5af06a849cb3d2dce2d01e8" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys 0.36.1", ] [[package]] name = "rustls" -version = "0.20.6" +version = "0.20.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +checksum = "539a2bfe908f471bfa933876bd1eb6a19cf2176d375f82ef7f99530a40e48c2c" dependencies = [ "log", "ring", @@ -3035,15 +3277,6 @@ dependencies = [ "webpki", ] -[[package]] -name = "rustls-pemfile" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee86d63972a7c661d1536fefe8c3c8407321c3df668891286de28abcd087360" -dependencies = [ - "base64", -] - [[package]] name = "rustls-pemfile" version = "1.0.1" @@ -3104,16 +3337,16 @@ dependencies = [ [[package]] name = "segment" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c14967a911a216177366bac6dfa1209b597e311a32360431c63526e27b814fb" +checksum = "24fc91c898e0487ff3e471d0849bbaf7d38a00ff5e3531009d386b0bab9b6b12" dependencies = [ "async-trait", "reqwest", "serde", "serde_json", "thiserror", - "time 0.3.14", + "time", ] [[package]] @@ -3127,9 +3360,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93f6841e709003d68bb2deee8c343572bf446003ec20a583e76f7b15cebf3711" +checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4" [[package]] name = "semver-parser" @@ -3139,9 +3372,9 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" [[package]] name = "serde" -version = "1.0.144" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" +checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" dependencies = [ "serde_derive", ] @@ -3157,23 +3390,23 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.144" +version = "1.0.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00" +checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] name = "serde_json" -version = "1.0.85" +version = "1.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +checksum = "6ce777b7b150d76b9cf60d28b55f5847135a003f7d7350c6be7a773508ce7d45" dependencies = [ "indexmap", - "itoa 1.0.3", + "itoa 1.0.4", "ryu", "serde", ] @@ -3185,7 +3418,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" dependencies = [ "form_urlencoded", - "itoa 1.0.3", + "itoa 1.0.4", "ryu", "serde", ] @@ -3203,9 +3436,9 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.4" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "006769ba83e921b3085caa8334186b00cf92b4cb1a6cf4632fbccc8eff5c7549" +checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" dependencies = [ "cfg-if", "cpufeatures", @@ -3214,9 +3447,9 @@ dependencies = [ [[package]] name = "sha2" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9db03534dff993187064c4e0c05a5708d2a9728ace9a8959b77bedf415dac5" +checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" dependencies = [ "cfg-if", "cpufeatures", @@ -3232,6 +3465,12 @@ dependencies = [ "libc", ] +[[package]] +name = "similar" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62ac7f900db32bf3fd12e0117dd3dc4da74bc52ebaac97f39668446d89694803" + [[package]] name = "simple_asn1" version = "0.6.2" @@ -3241,7 +3480,7 @@ dependencies = [ "num-bigint", "num-traits", "thiserror", - "time 0.3.14", + "time", ] [[package]] @@ -3277,9 +3516,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" [[package]] name = "smartstring" @@ -3365,11 +3604,11 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.99" +version = "1.0.103" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", "unicode-ident", ] @@ -3389,17 +3628,17 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", - "unicode-xid 0.2.3", + "syn 1.0.103", + "unicode-xid 0.2.4", ] [[package]] name = "sysinfo" -version = "0.23.13" +version = "0.26.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3977ec2e0520829be45c8a2df70db2bf364714d8a748316a10c3c35d4d2b01c9" +checksum = "c6d0dedf2e65d25b365c588382be9dc3a3ee4b0ed792366cf722d174c359d948" dependencies = [ "cfg-if", "core-foundation-sys", @@ -3421,6 +3660,15 @@ dependencies = [ "xattr", ] +[[package]] +name = "temp-env" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a30d48359f77fbb6af3d7b928cc2d092e1dc90b44f397e979ef08ae15733ed65" +dependencies = [ + "once_cell", +] + [[package]] name = "tempfile" version = "3.3.0" @@ -3445,66 +3693,69 @@ dependencies = [ ] [[package]] -name = "termtree" -version = "0.2.4" +name = "terminal_size" +version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507e9898683b6c43a9aa55b64259b721b52ba226e0f3779137e50ad114a4c90b" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] [[package]] name = "textwrap" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" +checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.34" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c1b05ca9d106ba7d2e31a9dab4a64e7be2cce415321966ea3132c49a656e252" +checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.34" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8f2591983642de85c921015f3f070c665a197ed69e417af436115e3a1407487" +checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] name = "time" -version = "0.1.44" +version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +checksum = "0fab5c8b9980850e06d92ddbe3ab839c062c801f3927c0fb8abd6fc8e918fbca" dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c3f9a28b618c3a6b9251b6908e9c99e04b9e5c02e6581ccbb67d59c34ef7f9b" -dependencies = [ - "itoa 1.0.3", + "itoa 1.0.4", "libc", "num_threads", "serde", + "time-core", "time-macros", ] [[package]] -name = "time-macros" -version = "0.2.4" +name = "time-core" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" +checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd" + +[[package]] +name = "time-macros" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bb801831d812c562ae7d2bfb531f26e66e4e1f6b17307ba4149c5064710e5b" +dependencies = [ + "time-core", +] [[package]] name = "tinyvec" @@ -3523,9 +3774,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" [[package]] name = "tokio" -version = "1.21.0" +version = "1.21.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89797afd69d206ccd11fb0ea560a44bbb87731d020670e79416d442919257d42" +checksum = "a9e03c497dc955702ba729190dc4aac6f2a0ce97f913e5b1b5912fc5039d9099" dependencies = [ "autocfg", "bytes", @@ -3533,7 +3784,6 @@ dependencies = [ "memchr", "mio", "num_cpus", - "once_cell", "parking_lot", "pin-project-lite", "signal-hook-registry", @@ -3548,9 +3798,9 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", ] [[package]] @@ -3566,9 +3816,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.9" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df54d54117d6fdc4e4fea40fe1e4e566b3505700e148a6827e59b34b0d2600d9" +checksum = "d660770404473ccd7bc9f8b28494a811bc18542b915c0855c51e8f419d5223ce" dependencies = [ "futures-core", "pin-project-lite", @@ -3606,9 +3856,9 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.36" +version = "0.1.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fce9567bd60a67d08a16488756721ba392f24f29006402881e43b19aac64307" +checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" dependencies = [ "cfg-if", "log", @@ -3618,9 +3868,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.29" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeea4303076558a00714b823f9ad67d58a3bbda1df83d8827d21193156e22f7" +checksum = "24eb03ba0eab1fd845050058ce5e616558e8f8d8fca633e6b163fe25c797213a" dependencies = [ "once_cell", ] @@ -3637,6 +3887,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "ucd-trie" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e79c4d996edb816c91e4308506774452e55e95c3c9de07b6729e17e15a5ef81" + [[package]] name = "unicase" version = "2.6.0" @@ -3654,24 +3910,24 @@ checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" [[package]] name = "unicode-ident" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" +checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" [[package]] name = "unicode-normalization" -version = "0.1.21" +version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" dependencies = [ "tinyvec", ] [[package]] name = "unicode-segmentation" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" +checksum = "0fdbf052a0783de01e944a6ce7a8cb939e295b1e7be835a1112c3b9a7f047a5a" [[package]] name = "unicode-xid" @@ -3681,9 +3937,9 @@ checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" [[package]] name = "unicode-xid" -version = "0.2.3" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" [[package]] name = "untrusted" @@ -3716,9 +3972,18 @@ checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" [[package]] name = "uuid" -version = "1.1.2" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "uuid" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feb41e78f93363bb2df8b0e86a2ca30eed7806ea16ea0c790d757cf93f79be83" dependencies = [ "getrandom", "serde", @@ -3744,12 +4009,12 @@ checksum = "73ba753d713ec3844652ad2cb7eb56bc71e34213a14faddac7852a10ba88f61e" dependencies = [ "anyhow", "cfg-if", - "enum-iterator 1.1.3", + "enum-iterator", "getset", "git2", "rustversion", "thiserror", - "time 0.3.14", + "time", ] [[package]] @@ -3803,12 +4068,6 @@ dependencies = [ "try-lock", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -3817,9 +4076,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.78" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -3827,24 +4086,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.78" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" dependencies = [ "bumpalo", - "lazy_static", "log", - "proc-macro2 1.0.43", + "once_cell", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.28" +version = "0.4.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d7523cb1f2a4c96c1317ca690031b714a51cc14e05f712446691f413f5d39" +checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" dependencies = [ "cfg-if", "js-sys", @@ -3854,9 +4113,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.78" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" dependencies = [ "quote 1.0.21", "wasm-bindgen-macro-support", @@ -3864,28 +4123,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.78" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" dependencies = [ - "proc-macro2 1.0.43", + "proc-macro2 1.0.47", "quote 1.0.21", - "syn 1.0.99", + "syn 1.0.103", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.78" +version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" +checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" [[package]] name = "web-sys" -version = "0.3.55" +version = "0.3.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" +checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" dependencies = [ "js-sys", "wasm-bindgen", @@ -3903,9 +4162,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.22.4" +version = "0.22.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1c760f0d366a6c24a02ed7816e23e691f5d92291f94d15e836006fd11b04daf" +checksum = "368bfe657969fb01238bb756d351dcade285e0f6fcbd36dcb23359a5169975be" dependencies = [ "webpki", ] @@ -3919,17 +4178,6 @@ dependencies = [ "hashbrown 0.7.2", ] -[[package]] -name = "whoami" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd9fa466115c4a3fadac4119e0f6182f0ef5c5356cddb0b0b5de09b87369f15" -dependencies = [ - "bumpalo", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "winapi" version = "0.3.9" @@ -3967,43 +4215,100 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", + "windows_aarch64_msvc 0.36.1", + "windows_i686_gnu 0.36.1", + "windows_i686_msvc 0.36.1", + "windows_x86_64_gnu 0.36.1", + "windows_x86_64_msvc 0.36.1", ] +[[package]] +name = "windows-sys" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a3e1820f08b8513f676f7ab6c1f99ff312fb97b553d30ff4dd86f9f15728aa7" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.42.0", + "windows_i686_gnu 0.42.0", + "windows_i686_msvc 0.42.0", + "windows_x86_64_gnu 0.42.0", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.42.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d2aa71f6f0cbe00ae5167d90ef3cfe66527d6f613ca78ac8024c3ccab9a19e" + [[package]] name = "windows_aarch64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd0f252f5a35cac83d6311b2e795981f5ee6e67eb1f9a7f64eb4500fbc4dcdb4" + [[package]] name = "windows_i686_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" +[[package]] +name = "windows_i686_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbeae19f6716841636c28d695375df17562ca208b2b7d0dc47635a50ae6c5de7" + [[package]] name = "windows_i686_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" +[[package]] +name = "windows_i686_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84c12f65daa39dd2babe6e442988fc329d6243fdce47d7d2d155b8d874862246" + [[package]] name = "windows_x86_64_gnu" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf7b1b21b5362cbc318f686150e5bcea75ecedc74dd157d874d754a2ca44b0ed" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09d525d2ba30eeb3297665bd434a54297e4170c7f1a44cad4ef58095b4cd2028" + [[package]] name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40009d85759725a34da6d89a94e63d7bdc50a862acf0dbc7c8e488f1edcb6f5" + [[package]] name = "winreg" version = "0.10.1" @@ -4028,6 +4333,15 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d12cb7a57bbf2ab670ed9545bae3648048547f9039279a89ce000208e585c1" +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yaup" version = "0.2.1" @@ -4054,21 +4368,56 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" dependencies = [ - "proc-macro2 1.0.43", - "syn 1.0.99", + "proc-macro2 1.0.47", + "syn 1.0.103", "synstructure", ] [[package]] name = "zip" -version = "0.5.13" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ab48844d61251bb3835145c521d88aa4031d7139e8485990f60ca911fa0815" +checksum = "537ce7411d25e54e8ae21a7ce0b15840e7bfcff15b51d697ec3266cc76bdf080" dependencies = [ + "aes", "byteorder", "bzip2", + "constant_time_eq", "crc32fast", + "crossbeam-utils", "flate2", - "thiserror", - "time 0.1.44", + "hmac", + "pbkdf2", + "sha1", + "time", + "zstd", +] + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.1+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +dependencies = [ + "cc", + "libc", ] diff --git a/Cargo.toml b/Cargo.toml index 678d1b78b..2b756f87c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,8 +3,11 @@ resolver = "2" members = [ "meilisearch-http", "meilisearch-types", - "meilisearch-lib", "meilisearch-auth", + "meili-snap", + "index-scheduler", + "dump", + "file-store", "permissive-json-pointer", ] diff --git a/README.md b/README.md index f728d8a6b..2bbc3dfe1 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,14 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f 🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥 +## 🎃 Hacktoberfest + +It’s Hacktoberfest 2022 @Meilisearch + +[Hacktoberfest](https://hacktoberfest.com/) is a celebration of the open-source community. This year, and for the third time in a row, Meilisearch is participating in this fantastic event. + +You’d like to contribute? Don’t hesitate to check out our [contributing guidelines](./CONTRIBUTING.md). + ## ✨ Features - **Search-as-you-type:** find search results in less than 50 milliseconds diff --git a/config.toml b/config.toml new file mode 100644 index 000000000..8933dd22f --- /dev/null +++ b/config.toml @@ -0,0 +1,135 @@ +# This file shows the default configuration of Meilisearch. +# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables + +db_path = "./data.ms" +# Designates the location where database files will be created and retrieved. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path + +env = "development" +# Configures the instance's environment. Value must be either `production` or `development`. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment + +http_addr = "localhost:7700" +# The address on which the HTTP server will listen. + +# master_key = "YOUR_MASTER_KEY_VALUE" +# Sets the instance's master key, automatically protecting all routes except GET /health. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key + +# no_analytics = true +# Deactivates Meilisearch's built-in telemetry when provided. +# Meilisearch automatically collects data from all instances that do not opt out using this flag. +# All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics + +http_payload_size_limit = "100 MB" +# Sets the maximum size of accepted payloads. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size + +log_level = "INFO" +# Defines how much detail should be present in Meilisearch's logs. +# Meilisearch currently supports five log levels, listed in order of increasing verbosity: `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE` +# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level + +max_index_size = "100 GiB" +# Sets the maximum size of the index. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-index-size + +max_task_db_size = "100 GiB" +# Sets the maximum size of the task database. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-task-db-size + +# max_indexing_memory = "2 GiB" +# Sets the maximum amount of RAM Meilisearch can use when indexing. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory + +# max_indexing_threads = 4 +# Sets the maximum number of threads Meilisearch can use during indexing. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads + +disable_auto_batching = false +# Deactivates auto-batching when provided. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-auto-batching + + +############# +### DUMPS ### +############# + +dumps_dir = "dumps/" +# Sets the directory where Meilisearch will create dump files. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#dumps-destination + +# import_dump = "./path/to/my/file.dump" +# Imports the dump file located at the specified path. Path must point to a .dump file. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump + +ignore_missing_dump = false +# Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump + +ignore_dump_if_db_exists = false +# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists + + +################# +### SNAPSHOTS ### +################# + +schedule_snapshot = false +# Activates scheduled snapshots when provided. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation + +snapshot_dir = "snapshots/" +# Sets the directory where Meilisearch will store snapshots. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination + +snapshot_interval_sec = 86400 +# Defines the interval between each snapshot. Value must be given in seconds. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-interval + +# import_snapshot = "./path/to/my/snapshot" +# Launches Meilisearch after importing a previously-generated snapshot at the given filepath. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot + +ignore_missing_snapshot = false +# Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot + +ignore_snapshot_if_db_exists = false +# Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists + + +########### +### SSL ### +########### + +# ssl_auth_path = "./path/to/root" +# Enables client authentication in the specified path. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path + +# ssl_cert_path = "./path/to/certfile" +# Sets the server's SSL certificates. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path + +# ssl_key_path = "./path/to/private-key" +# Sets the server's SSL key files. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path + +# ssl_ocsp_path = "./path/to/ocsp-file" +# Sets the server's OCSP file. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path + +ssl_require_auth = false +# Makes SSL authentication mandatory. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth + +ssl_resumption = false +# Activates SSL session resumption. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption + +ssl_tickets = false +# Activates SSL tickets. +# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets diff --git a/download-latest.sh b/download-latest.sh index d1cfdd127..42863d587 100644 --- a/download-latest.sh +++ b/download-latest.sh @@ -1,29 +1,38 @@ #!/bin/sh -# COLORS +# GLOBALS + +# Colors RED='\033[31m' GREEN='\033[32m' DEFAULT='\033[0m' -# GLOBALS -GREP_SEMVER_REGEXP='v\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)$' # i.e. v[number].[number].[number] +# Project name +PNAME='meilisearch' + +# Version regexp i.e. v[number].[number].[number] +GREP_SEMVER_REGEXP='v\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)$' + +# GitHub API address +GITHUB_API='https://api.github.com/repos/meilisearch/meilisearch/releases' +# GitHub Release address +GITHUB_REL='https://github.com/meilisearch/meilisearch/releases/download/' # FUNCTIONS -# semverParseInto and semverLT from https://github.com/cloudflare/semver_bash/blob/master/semver.sh - +# semverParseInto and semverLT from: https://github.com/cloudflare/semver_bash/blob/master/semver.sh # usage: semverParseInto version major minor patch special # version: the string version # major, minor, patch, special: will be assigned by the function semverParseInto() { local RE='[^0-9]*\([0-9]*\)[.]\([0-9]*\)[.]\([0-9]*\)\([0-9A-Za-z-]*\)' - #MAJOR + # MAJOR eval $2=`echo $1 | sed -e "s#$RE#\1#"` - #MINOR + # MINOR eval $3=`echo $1 | sed -e "s#$RE#\2#"` - #PATCH + # PATCH eval $4=`echo $1 | sed -e "s#$RE#\3#"` - #SPECIAL + # SPECIAL eval $5=`echo $1 | sed -e "s#$RE#\4#"` } @@ -67,16 +76,22 @@ semverLT() { return 1 } -# Get a token from https://github.com/settings/tokens to increase rate limit (from 60 to 5000), make sure the token scope is set to 'public_repo' -# Create GITHUB_PAT environment variable once you acquired the token to start using it -# Returns the tag of the latest stable release (in terms of semver and not of release date) +# Get a token from: https://github.com/settings/tokens to increase rate limit (from 60 to 5000), +# make sure the token scope is set to 'public_repo'. +# Create GITHUB_PAT environment variable once you acquired the token to start using it. +# Returns the tag of the latest stable release (in terms of semver and not of release date). get_latest() { - temp_file='temp_file' # temp_file needed because the grep would start before the download is over + # temp_file is needed because the grep would start before the download is over + temp_file=$(mktemp -q /tmp/$PNAME.XXXXXXXXX) + if [ $? -ne 0 ]; then + echo "$0: Can't create temp file, bye bye.." + exit 1 + fi if [ -z "$GITHUB_PAT" ]; then - curl -s 'https://api.github.com/repos/meilisearch/meilisearch/releases' > "$temp_file" || return 1 + curl -s $GITHUB_API > "$temp_file" || return 1 else - curl -H "Authorization: token $GITHUB_PAT" -s 'https://api.github.com/repos/meilisearch/meilisearch/releases' > "$temp_file" || return 1 + curl -H "Authorization: token $GITHUB_PAT" -s $GITHUB_API > "$temp_file" || return 1 fi releases=$(cat "$temp_file" | \ @@ -89,28 +104,35 @@ get_latest() { latest='' current_tag='' for release_info in $releases; do - if [ $i -eq 0 ]; then # Checking tag_name - if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release + # Checking tag_name + if [ $i -eq 0 ]; then + # If it's not an alpha or beta release + if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then current_tag=$release_info else current_tag='' fi i=1 - elif [ $i -eq 1 ]; then # Checking draft boolean + # Checking draft boolean + elif [ $i -eq 1 ]; then if [ "$release_info" = 'true' ]; then current_tag='' fi i=2 - elif [ $i -eq 2 ]; then # Checking prerelease boolean + # Checking prerelease boolean + elif [ $i -eq 2 ]; then if [ "$release_info" = 'true' ]; then current_tag='' fi i=0 - if [ "$current_tag" != '' ]; then # If the current_tag is valid - if [ "$latest" = '' ]; then # If there is no latest yet + # If the current_tag is valid + if [ "$current_tag" != '' ]; then + # If there is no latest yes + if [ "$latest" = '' ]; then latest="$current_tag" else - semverLT $current_tag $latest # Comparing latest and the current tag + # Comparing latest and the current tag + semverLT $current_tag $latest if [ $? -eq 1 ]; then latest="$current_tag" fi @@ -123,7 +145,7 @@ get_latest() { return 0 } -# Gets the OS by setting the $os variable +# Gets the OS by setting the $os variable. # Returns 0 in case of success, 1 otherwise. get_os() { os_name=$(uname -s) @@ -134,7 +156,7 @@ get_os() { 'Linux') os='linux' ;; - 'MINGW'*) + 'MINGW'*) os='windows' ;; *) @@ -143,7 +165,7 @@ get_os() { return 0 } -# Gets the architecture by setting the $archi variable +# Gets the architecture by setting the $archi variable. # Returns 0 in case of success, 1 otherwise. get_archi() { architecture=$(uname -m) @@ -152,7 +174,8 @@ get_archi() { archi='amd64' ;; 'arm64') - if [ $os = 'macos' ]; then # MacOS M1 + # MacOS M1 + if [ $os = 'macos' ]; then archi='amd64' else archi='aarch64' @@ -171,9 +194,9 @@ success_usage() { printf "$GREEN%s\n$DEFAULT" "Meilisearch $latest binary successfully downloaded as '$binary_name' file." echo '' echo 'Run it:' - echo ' $ ./meilisearch' + echo " $ ./$PNAME" echo 'Usage:' - echo ' $ ./meilisearch --help' + echo " $ ./$PNAME --help" } not_available_failure_usage() { @@ -189,52 +212,55 @@ fetch_release_failure_usage() { echo 'Please let us know about this issue: https://github.com/meilisearch/meilisearch/issues/new/choose' } +fill_release_variables() { + # Fill $latest variable. + if ! get_latest; then + # TO CHANGE. + fetch_release_failure_usage + exit 1 + fi + if [ "$latest" = '' ]; then + fetch_release_failure_usage + exit 1 + fi + # Fill $os variable. + if ! get_os; then + not_available_failure_usage + exit 1 + fi + # Fill $archi variable. + if ! get_archi; then + not_available_failure_usage + exit 1 + fi +} + +download_binary() { + fill_release_variables + echo "Downloading Meilisearch binary $latest for $os, architecture $archi..." + case "$os" in + 'windows') + release_file="$PNAME-$os-$archi.exe" + binary_name="$PNAME.exe" + ;; + *) + release_file="$PNAME-$os-$archi" + binary_name="$PNAME" + esac + # Fetch the Meilisearch binary. + curl --fail -OL "$GITHUB_REL/$latest/$release_file" + if [ $? -ne 0 ]; then + fetch_release_failure_usage + exit 1 + fi + mv "$release_file" "$binary_name" + chmod 744 "$binary_name" + success_usage +} + # MAIN -# Fill $latest variable -if ! get_latest; then - fetch_release_failure_usage # TO CHANGE - exit 1 -fi - -if [ "$latest" = '' ]; then - fetch_release_failure_usage - exit 1 -fi - -# Fill $os variable -if ! get_os; then - not_available_failure_usage - exit 1 -fi - -# Fill $archi variable -if ! get_archi; then - not_available_failure_usage - exit 1 -fi - -echo "Downloading Meilisearch binary $latest for $os, architecture $archi..." -case "$os" in - 'windows') - release_file="meilisearch-$os-$archi.exe" - binary_name='meilisearch.exe' - - ;; - *) - release_file="meilisearch-$os-$archi" - binary_name='meilisearch' - -esac - -# Fetch the Meilisearch binary -link="https://github.com/meilisearch/meilisearch/releases/download/$latest/$release_file" -curl --fail -OL "$link" -if [ $? -ne 0 ]; then - fetch_release_failure_usage - exit 1 -fi - -mv "$release_file" "$binary_name" -chmod 744 "$binary_name" -success_usage +main() { + download_binary +} +main diff --git a/dump/Cargo.toml b/dump/Cargo.toml new file mode 100644 index 000000000..c5dc10949 --- /dev/null +++ b/dump/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "dump" +version = "0.30.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.65" +flate2 = "1.0.22" +http = "0.2.8" +log = "0.4.17" +meilisearch-auth = { path = "../meilisearch-auth" } +meilisearch-types = { path = "../meilisearch-types" } +once_cell = "1.15.0" +regex = "1.6.0" +roaring = { version = "0.10.0", features = ["serde"] } +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.85", features = ["preserve_order"] } +tar = "0.4.38" +tempfile = "3.3.0" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +big_s = "1.0.2" +maplit = "1.0.2" +meili-snap = { path = "../meili-snap" } +meilisearch-types = { path = "../meilisearch-types" } diff --git a/dump/README.md b/dump/README.md new file mode 100644 index 000000000..3537f188e --- /dev/null +++ b/dump/README.md @@ -0,0 +1,17 @@ +``` +dump +├── indexes +│ ├── cattos +│ │ ├── documents.jsonl +│ │ └── settings.json +│ └── doggos +│ ├── documents.jsonl +│ └── settings.json +├── instance-uid.uuid +├── keys.jsonl +├── metadata.json +└── tasks + ├── update_files + │ └── [task_id].jsonl + └── queue.jsonl +``` \ No newline at end of file diff --git a/dump/src/error.rs b/dump/src/error.rs new file mode 100644 index 000000000..a11aae9cf --- /dev/null +++ b/dump/src/error.rs @@ -0,0 +1,36 @@ +use meilisearch_types::error::{Code, ErrorCode}; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.")] + DumpV1Unsupported, + #[error("Bad index name.")] + BadIndexName, + #[error("Malformed task.")] + MalformedTask, + + #[error(transparent)] + Io(#[from] std::io::Error), + #[error(transparent)] + Serde(#[from] serde_json::Error), + #[error(transparent)] + Uuid(#[from] uuid::Error), +} + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + // Are these three really Internal errors? + // TODO look at that later. + Error::Io(_) => Code::Internal, + Error::Serde(_) => Code::Internal, + Error::Uuid(_) => Code::Internal, + + // all these errors should never be raised when creating a dump, thus no error code should be associated. + Error::DumpV1Unsupported => Code::Internal, + Error::BadIndexName => Code::Internal, + Error::MalformedTask => Code::Internal, + } + } +} diff --git a/dump/src/lib.rs b/dump/src/lib.rs new file mode 100644 index 000000000..423ad008c --- /dev/null +++ b/dump/src/lib.rs @@ -0,0 +1,464 @@ +#![allow(clippy::type_complexity)] +#![allow(clippy::wrong_self_convention)] + +use meilisearch_types::error::ResponseError; +use meilisearch_types::keys::Key; +use meilisearch_types::milli::update::IndexDocumentsMethod; +use meilisearch_types::settings::Unchecked; +use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId}; +use meilisearch_types::InstanceUid; +use roaring::RoaringBitmap; +use serde::{Deserialize, Serialize}; +use time::OffsetDateTime; + +mod error; +mod reader; +mod writer; + +pub use error::Error; +pub use reader::{DumpReader, UpdateFile}; +pub use writer::DumpWriter; + +const CURRENT_DUMP_VERSION: Version = Version::V6; + +type Result = std::result::Result; + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + pub dump_version: Version, + pub db_version: String, + #[serde(with = "time::serde::rfc3339")] + pub dump_date: OffsetDateTime, +} + +#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IndexMetadata { + pub uid: String, + pub primary_key: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +#[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] +pub enum Version { + V1, + V2, + V3, + V4, + V5, + V6, +} + +#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TaskDump { + pub uid: TaskId, + #[serde(default)] + pub index_uid: Option, + pub status: Status, + #[serde(rename = "type")] + pub kind: KindDump, + + #[serde(skip_serializing_if = "Option::is_none")] + pub canceled_by: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option
, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde( + with = "time::serde::rfc3339::option", + skip_serializing_if = "Option::is_none", + default + )] + pub started_at: Option, + #[serde( + with = "time::serde::rfc3339::option", + skip_serializing_if = "Option::is_none", + default + )] + pub finished_at: Option, +} + +// A `Kind` specific version made for the dump. If modified you may break the dump. +#[derive(Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum KindDump { + DocumentImport { + primary_key: Option, + method: IndexDocumentsMethod, + documents_count: u64, + allow_index_creation: bool, + }, + DocumentDeletion { + documents_ids: Vec, + }, + DocumentClear, + Settings { + settings: Box>, + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion, + IndexCreation { + primary_key: Option, + }, + IndexUpdate { + primary_key: Option, + }, + IndexSwap { + swaps: Vec, + }, + TaskCancelation { + query: String, + tasks: RoaringBitmap, + }, + TasksDeletion { + query: String, + tasks: RoaringBitmap, + }, + DumpCreation { + keys: Vec, + instance_uid: Option, + }, + SnapshotCreation, +} + +impl From for TaskDump { + fn from(task: Task) -> Self { + TaskDump { + uid: task.uid, + index_uid: task.index_uid().map(|uid| uid.to_string()), + status: task.status, + kind: task.kind.into(), + canceled_by: task.canceled_by, + details: task.details, + error: task.error, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + } + } +} + +impl From for KindDump { + fn from(kind: KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { + primary_key, + method, + documents_count, + allow_index_creation, + .. + } => KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + }, + KindWithContent::DocumentDeletion { documents_ids, .. } => { + KindDump::DocumentDeletion { documents_ids } + } + KindWithContent::DocumentClear { .. } => KindDump::DocumentClear, + KindWithContent::SettingsUpdate { + new_settings, + is_deletion, + allow_index_creation, + .. + } => KindDump::Settings { settings: new_settings, is_deletion, allow_index_creation }, + KindWithContent::IndexDeletion { .. } => KindDump::IndexDeletion, + KindWithContent::IndexCreation { primary_key, .. } => { + KindDump::IndexCreation { primary_key } + } + KindWithContent::IndexUpdate { primary_key, .. } => { + KindDump::IndexUpdate { primary_key } + } + KindWithContent::IndexSwap { swaps } => KindDump::IndexSwap { swaps }, + KindWithContent::TaskCancelation { query, tasks } => { + KindDump::TaskCancelation { query, tasks } + } + KindWithContent::TaskDeletion { query, tasks } => { + KindDump::TasksDeletion { query, tasks } + } + KindWithContent::DumpCreation { keys, instance_uid } => { + KindDump::DumpCreation { keys, instance_uid } + } + KindWithContent::SnapshotCreation => KindDump::SnapshotCreation, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::{Seek, SeekFrom}; + use std::str::FromStr; + + use big_s::S; + use maplit::btreeset; + use meilisearch_types::index_uid::IndexUid; + use meilisearch_types::keys::{Action, Key}; + use meilisearch_types::milli::update::Setting; + use meilisearch_types::milli::{self}; + use meilisearch_types::settings::{Checked, Settings}; + use meilisearch_types::star_or::StarOr; + use meilisearch_types::tasks::{Details, Status}; + use serde_json::{json, Map, Value}; + use time::macros::datetime; + use uuid::Uuid; + + use crate::reader::Document; + use crate::{DumpReader, DumpWriter, IndexMetadata, KindDump, TaskDump, Version}; + + pub fn create_test_instance_uid() -> Uuid { + Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap() + } + + pub fn create_test_index_metadata() -> IndexMetadata { + IndexMetadata { + uid: S("doggo"), + primary_key: None, + created_at: datetime!(2022-11-20 12:00 UTC), + updated_at: datetime!(2022-11-21 00:00 UTC), + } + } + + pub fn create_test_documents() -> Vec> { + vec![ + json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 }) + .as_object() + .unwrap() + .clone(), + json!({ "id": 2, "race": "bernese mountain", "name": "tamo", "age": 6 }) + .as_object() + .unwrap() + .clone(), + json!({ "id": 3, "race": "great pyrenees", "name": "patou", "age": 5 }) + .as_object() + .unwrap() + .clone(), + ] + } + + pub fn create_test_settings() -> Settings { + let settings = Settings { + displayed_attributes: Setting::Set(vec![S("race"), S("name")]), + searchable_attributes: Setting::Set(vec![S("name"), S("race")]), + filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }), + sortable_attributes: Setting::Set(btreeset! { S("age") }), + ranking_rules: Setting::NotSet, + stop_words: Setting::NotSet, + synonyms: Setting::NotSet, + distinct_attribute: Setting::NotSet, + typo_tolerance: Setting::NotSet, + faceting: Setting::NotSet, + pagination: Setting::NotSet, + _kind: std::marker::PhantomData, + }; + settings.check() + } + + pub fn create_test_tasks() -> Vec<(TaskDump, Option>)> { + vec![ + ( + TaskDump { + uid: 0, + index_uid: Some(S("doggo")), + status: Status::Succeeded, + kind: KindDump::DocumentImport { + method: milli::update::IndexDocumentsMethod::UpdateDocuments, + allow_index_creation: true, + primary_key: Some(S("bone")), + documents_count: 12, + }, + canceled_by: None, + details: Some(Details::DocumentAdditionOrUpdate { + received_documents: 12, + indexed_documents: Some(10), + }), + error: None, + enqueued_at: datetime!(2022-11-11 0:00 UTC), + started_at: Some(datetime!(2022-11-20 0:00 UTC)), + finished_at: Some(datetime!(2022-11-21 0:00 UTC)), + }, + None, + ), + ( + TaskDump { + uid: 1, + index_uid: Some(S("doggo")), + status: Status::Enqueued, + kind: KindDump::DocumentImport { + method: milli::update::IndexDocumentsMethod::UpdateDocuments, + allow_index_creation: true, + primary_key: None, + documents_count: 2, + }, + canceled_by: None, + details: Some(Details::DocumentAdditionOrUpdate { + received_documents: 2, + indexed_documents: None, + }), + error: None, + enqueued_at: datetime!(2022-11-11 0:00 UTC), + started_at: None, + finished_at: None, + }, + Some(vec![ + json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(), + json!({ "id": 5, "race": "patou" }).as_object().unwrap().clone(), + ]), + ), + ( + TaskDump { + uid: 5, + index_uid: Some(S("catto")), + status: Status::Enqueued, + kind: KindDump::IndexDeletion, + canceled_by: None, + details: None, + error: None, + enqueued_at: datetime!(2022-11-15 0:00 UTC), + started_at: None, + finished_at: None, + }, + None, + ), + ] + } + + pub fn create_test_api_keys() -> Vec { + vec![ + Key { + description: Some(S("The main key to manage all the doggos")), + name: Some(S("doggos_key")), + uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(), + actions: vec![Action::DocumentsAll], + indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())], + expires_at: Some(datetime!(4130-03-14 12:21 UTC)), + created_at: datetime!(1960-11-15 0:00 UTC), + updated_at: datetime!(2022-11-10 0:00 UTC), + }, + Key { + description: Some(S("The master key for everything and even the doggos")), + name: Some(S("master_key")), + uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(), + actions: vec![Action::All], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: datetime!(0000-01-01 00:01 UTC), + updated_at: datetime!(1964-05-04 17:25 UTC), + }, + Key { + description: Some(S("The useless key to for nothing nor the doggos")), + name: Some(S("useless_key")), + uid: Uuid::from_str("fb80b58b-0a34-412f-8ba7-1ce868f8ac5c").unwrap(), + actions: vec![], + indexes: vec![], + expires_at: None, + created_at: datetime!(400-02-29 0:00 UTC), + updated_at: datetime!(1024-02-29 0:00 UTC), + }, + ] + } + + pub fn create_test_dump() -> File { + let instance_uid = create_test_instance_uid(); + let dump = DumpWriter::new(Some(instance_uid)).unwrap(); + + // ========== Adding an index + let documents = create_test_documents(); + let settings = create_test_settings(); + + let mut index = dump.create_index("doggos", &create_test_index_metadata()).unwrap(); + for document in &documents { + index.push_document(document).unwrap(); + } + index.flush().unwrap(); + index.settings(&settings).unwrap(); + + // ========== pushing the task queue + let tasks = create_test_tasks(); + + let mut task_queue = dump.create_tasks_queue().unwrap(); + for (task, update_file) in &tasks { + let mut update = task_queue.push_task(task).unwrap(); + if let Some(update_file) = update_file { + for u in update_file { + update.push_document(u).unwrap(); + } + } + } + task_queue.flush().unwrap(); + + // ========== pushing the api keys + let api_keys = create_test_api_keys(); + + let mut keys = dump.create_keys().unwrap(); + for key in &api_keys { + keys.push_key(key).unwrap(); + } + keys.flush().unwrap(); + + // create the dump + let mut file = tempfile::tempfile().unwrap(); + dump.persist_to(&mut file).unwrap(); + file.seek(SeekFrom::Start(0)).unwrap(); + + file + } + + #[test] + #[ignore] + fn test_creating_and_read_dump() { + let mut file = create_test_dump(); + let mut dump = DumpReader::open(&mut file).unwrap(); + + // ==== checking the top level infos + assert_eq!(dump.version(), Version::V6); + assert!(dump.date().is_some()); + assert_eq!(dump.instance_uid().unwrap().unwrap(), create_test_instance_uid()); + + // ==== checking the index + let mut indexes = dump.indexes().unwrap(); + let mut index = indexes.next().unwrap().unwrap(); + assert!(indexes.next().is_none()); // there was only one index in the dump + + for (document, expected) in index.documents().unwrap().zip(create_test_documents()) { + assert_eq!(document.unwrap(), expected); + } + assert_eq!(index.settings().unwrap(), create_test_settings()); + assert_eq!(index.metadata(), &create_test_index_metadata()); + + drop(index); + drop(indexes); + + // ==== checking the task queue + for (task, expected) in dump.tasks().unwrap().zip(create_test_tasks()) { + let (task, content_file) = task.unwrap(); + assert_eq!(task, expected.0); + + if let Some(expected_update) = expected.1 { + assert!( + content_file.is_some(), + "A content file was expected for the task {}.", + expected.0.uid + ); + let updates = content_file.unwrap().collect::, _>>().unwrap(); + assert_eq!(updates, expected_update); + } + } + + // ==== checking the keys + for (key, expected) in dump.keys().unwrap().zip(create_test_api_keys()) { + assert_eq!(key.unwrap(), expected); + } + } +} diff --git a/dump/src/reader/compat/mod.rs b/dump/src/reader/compat/mod.rs new file mode 100644 index 000000000..29836aa61 --- /dev/null +++ b/dump/src/reader/compat/mod.rs @@ -0,0 +1,4 @@ +pub mod v2_to_v3; +pub mod v3_to_v4; +pub mod v4_to_v5; +pub mod v5_to_v6; diff --git a/dump/src/reader/compat/v2_to_v3.rs b/dump/src/reader/compat/v2_to_v3.rs new file mode 100644 index 000000000..709670265 --- /dev/null +++ b/dump/src/reader/compat/v2_to_v3.rs @@ -0,0 +1,480 @@ +use std::convert::TryInto; +use std::str::FromStr; + +use time::OffsetDateTime; +use uuid::Uuid; + +use super::v3_to_v4::CompatV3ToV4; +use crate::reader::{v2, v3, Document}; +use crate::Result; + +pub struct CompatV2ToV3 { + pub from: v2::V2Reader, +} + +impl CompatV2ToV3 { + pub fn new(v2: v2::V2Reader) -> CompatV2ToV3 { + CompatV2ToV3 { from: v2 } + } + + pub fn index_uuid(&self) -> Vec { + self.from + .index_uuid() + .into_iter() + .map(|index| v3::meta::IndexUuid { uid: index.uid, uuid: index.uuid }) + .collect() + } + + pub fn to_v4(self) -> CompatV3ToV4 { + CompatV3ToV4::Compat(self) + } + + pub fn version(&self) -> crate::Version { + self.from.version() + } + + pub fn date(&self) -> Option { + self.from.date() + } + + pub fn instance_uid(&self) -> Result> { + Ok(None) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.from.indexes()?.map(|index_reader| -> Result<_> { + let compat = CompatIndexV2ToV3::new(index_reader?); + Ok(compat) + })) + } + + pub fn tasks( + &mut self, + ) -> Box< + dyn Iterator>>>)>> + + '_, + > { + let _indexes = self.from.index_uuid.clone(); + + Box::new( + self.from + .tasks() + .map(move |task| { + task.map(|(task, content_file)| { + let task = v3::Task { uuid: task.uuid, update: task.update.into() }; + + Some(( + task, + content_file.map(|content_file| { + Box::new(content_file) as Box>> + }), + )) + }) + }) + .filter_map(|res| res.transpose()), + ) + } +} + +pub struct CompatIndexV2ToV3 { + from: v2::V2IndexReader, +} + +impl CompatIndexV2ToV3 { + pub fn new(v2: v2::V2IndexReader) -> CompatIndexV2ToV3 { + CompatIndexV2ToV3 { from: v2 } + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + self.from.metadata() + } + + pub fn documents(&mut self) -> Result> + '_>> { + self.from + .documents() + .map(|iter| Box::new(iter) as Box> + '_>) + } + + pub fn settings(&mut self) -> Result> { + Ok(v3::Settings::::from(self.from.settings()?).check()) + } +} + +impl From for v3::updates::UpdateStatus { + fn from(update: v2::updates::UpdateStatus) -> Self { + match update { + v2::updates::UpdateStatus::Processing(processing) => { + match (processing.from.meta.clone(), processing.from.content).try_into() { + Ok(meta) => v3::updates::UpdateStatus::Processing(v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: processing.from.update_id, + meta, + enqueued_at: processing.from.enqueued_at, + }, + started_processing_at: processing.started_processing_at, + }), + Err(e) => { + log::warn!("Error with task {}: {}", processing.from.update_id, e); + log::warn!("Task will be marked as `Failed`."); + v3::updates::UpdateStatus::Failed(v3::updates::Failed { + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: processing.from.update_id, + meta: update_from_unchecked_update_meta(processing.from.meta), + enqueued_at: processing.from.enqueued_at, + }, + started_processing_at: processing.started_processing_at, + }, + msg: e.to_string(), + code: v3::Code::MalformedDump, + failed_at: OffsetDateTime::now_utc(), + }) + } + } + } + v2::updates::UpdateStatus::Enqueued(enqueued) => { + match (enqueued.meta.clone(), enqueued.content).try_into() { + Ok(meta) => v3::updates::UpdateStatus::Enqueued(v3::updates::Enqueued { + update_id: enqueued.update_id, + meta, + enqueued_at: enqueued.enqueued_at, + }), + Err(e) => { + log::warn!("Error with task {}: {}", enqueued.update_id, e); + log::warn!("Task will be marked as `Failed`."); + v3::updates::UpdateStatus::Failed(v3::updates::Failed { + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: enqueued.update_id, + meta: update_from_unchecked_update_meta(enqueued.meta), + enqueued_at: enqueued.enqueued_at, + }, + started_processing_at: OffsetDateTime::now_utc(), + }, + msg: e.to_string(), + code: v3::Code::MalformedDump, + failed_at: OffsetDateTime::now_utc(), + }) + } + } + } + v2::updates::UpdateStatus::Processed(processed) => { + v3::updates::UpdateStatus::Processed(v3::updates::Processed { + success: processed.success.into(), + processed_at: processed.processed_at, + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: processed.from.from.update_id, + // since we're never going to read the content_file again it's ok to generate a fake one. + meta: update_from_unchecked_update_meta(processed.from.from.meta), + enqueued_at: processed.from.from.enqueued_at, + }, + started_processing_at: processed.from.started_processing_at, + }, + }) + } + v2::updates::UpdateStatus::Aborted(aborted) => { + v3::updates::UpdateStatus::Aborted(v3::updates::Aborted { + from: v3::updates::Enqueued { + update_id: aborted.from.update_id, + // since we're never going to read the content_file again it's ok to generate a fake one. + meta: update_from_unchecked_update_meta(aborted.from.meta), + enqueued_at: aborted.from.enqueued_at, + }, + aborted_at: aborted.aborted_at, + }) + } + v2::updates::UpdateStatus::Failed(failed) => { + v3::updates::UpdateStatus::Failed(v3::updates::Failed { + from: v3::updates::Processing { + from: v3::updates::Enqueued { + update_id: failed.from.from.update_id, + // since we're never going to read the content_file again it's ok to generate a fake one. + meta: update_from_unchecked_update_meta(failed.from.from.meta), + enqueued_at: failed.from.from.enqueued_at, + }, + started_processing_at: failed.from.started_processing_at, + }, + msg: failed.error.message, + code: failed.error.error_code.into(), + failed_at: failed.failed_at, + }) + } + } + } +} + +impl TryFrom<(v2::updates::UpdateMeta, Option)> for v3::updates::Update { + type Error = crate::Error; + + fn try_from((update, uuid): (v2::updates::UpdateMeta, Option)) -> Result { + Ok(match update { + v2::updates::UpdateMeta::DocumentsAddition { method, format: _, primary_key } + if uuid.is_some() => + { + v3::updates::Update::DocumentAddition { + primary_key, + method: match method { + v2::updates::IndexDocumentsMethod::ReplaceDocuments => { + v3::updates::IndexDocumentsMethod::ReplaceDocuments + } + v2::updates::IndexDocumentsMethod::UpdateDocuments => { + v3::updates::IndexDocumentsMethod::UpdateDocuments + } + }, + content_uuid: uuid.unwrap(), + } + } + v2::updates::UpdateMeta::DocumentsAddition { .. } => { + return Err(crate::Error::MalformedTask) + } + v2::updates::UpdateMeta::ClearDocuments => v3::updates::Update::ClearDocuments, + v2::updates::UpdateMeta::DeleteDocuments { ids } => { + v3::updates::Update::DeleteDocuments(ids) + } + v2::updates::UpdateMeta::Settings(settings) => { + v3::updates::Update::Settings(settings.into()) + } + }) + } +} + +pub fn update_from_unchecked_update_meta(update: v2::updates::UpdateMeta) -> v3::updates::Update { + match update { + v2::updates::UpdateMeta::DocumentsAddition { method, format: _, primary_key } => { + v3::updates::Update::DocumentAddition { + primary_key, + method: match method { + v2::updates::IndexDocumentsMethod::ReplaceDocuments => { + v3::updates::IndexDocumentsMethod::ReplaceDocuments + } + v2::updates::IndexDocumentsMethod::UpdateDocuments => { + v3::updates::IndexDocumentsMethod::UpdateDocuments + } + }, + // we use this special uuid so we can recognize it if one day there is a bug related to this field. + content_uuid: Uuid::from_str("00112233-4455-6677-8899-aabbccddeeff").unwrap(), + } + } + v2::updates::UpdateMeta::ClearDocuments => v3::updates::Update::ClearDocuments, + v2::updates::UpdateMeta::DeleteDocuments { ids } => { + v3::updates::Update::DeleteDocuments(ids) + } + v2::updates::UpdateMeta::Settings(settings) => { + v3::updates::Update::Settings(settings.into()) + } + } +} + +impl From for v3::updates::UpdateResult { + fn from(result: v2::updates::UpdateResult) -> Self { + match result { + v2::updates::UpdateResult::DocumentsAddition(addition) => { + v3::updates::UpdateResult::DocumentsAddition(v3::updates::DocumentAdditionResult { + nb_documents: addition.nb_documents, + }) + } + v2::updates::UpdateResult::DocumentDeletion { deleted } => { + v3::updates::UpdateResult::DocumentDeletion { deleted } + } + v2::updates::UpdateResult::Other => v3::updates::UpdateResult::Other, + } + } +} + +impl From for v3::Code { + fn from(code: String) -> Self { + match code.as_ref() { + "create_index" => v3::Code::CreateIndex, + "index_already_exists" => v3::Code::IndexAlreadyExists, + "index_not_found" => v3::Code::IndexNotFound, + "invalid_index_uid" => v3::Code::InvalidIndexUid, + "invalid_state" => v3::Code::InvalidState, + "missing_primary_key" => v3::Code::MissingPrimaryKey, + "primary_key_already_present" => v3::Code::PrimaryKeyAlreadyPresent, + "max_fields_limit_exceeded" => v3::Code::MaxFieldsLimitExceeded, + "missing_document_id" => v3::Code::MissingDocumentId, + "invalid_document_id" => v3::Code::InvalidDocumentId, + "filter" => v3::Code::Filter, + "sort" => v3::Code::Sort, + "bad_parameter" => v3::Code::BadParameter, + "bad_request" => v3::Code::BadRequest, + "database_size_limit_reached" => v3::Code::DatabaseSizeLimitReached, + "document_not_found" => v3::Code::DocumentNotFound, + "internal" => v3::Code::Internal, + "invalid_geo_field" => v3::Code::InvalidGeoField, + "invalid_ranking_rule" => v3::Code::InvalidRankingRule, + "invalid_store" => v3::Code::InvalidStore, + "invalid_token" => v3::Code::InvalidToken, + "missing_authorization_header" => v3::Code::MissingAuthorizationHeader, + "no_space_left_on_device" => v3::Code::NoSpaceLeftOnDevice, + "dump_not_found" => v3::Code::DumpNotFound, + "task_not_found" => v3::Code::TaskNotFound, + "payload_too_large" => v3::Code::PayloadTooLarge, + "retrieve_document" => v3::Code::RetrieveDocument, + "search_documents" => v3::Code::SearchDocuments, + "unsupported_media_type" => v3::Code::UnsupportedMediaType, + "dump_already_in_progress" => v3::Code::DumpAlreadyInProgress, + "dump_process_failed" => v3::Code::DumpProcessFailed, + "invalid_content_type" => v3::Code::InvalidContentType, + "missing_content_type" => v3::Code::MissingContentType, + "malformed_payload" => v3::Code::MalformedPayload, + "missing_payload" => v3::Code::MissingPayload, + other => { + log::warn!("Unknown error code {}", other); + v3::Code::UnretrievableErrorCode + } + } + } +} + +fn option_to_setting(opt: Option>) -> v3::Setting { + match opt { + Some(Some(t)) => v3::Setting::Set(t), + None => v3::Setting::NotSet, + Some(None) => v3::Setting::Reset, + } +} + +impl From> for v3::Settings { + fn from(settings: v2::Settings) -> Self { + v3::Settings { + displayed_attributes: option_to_setting(settings.displayed_attributes), + searchable_attributes: option_to_setting(settings.searchable_attributes), + filterable_attributes: option_to_setting(settings.filterable_attributes) + .map(|f| f.into_iter().collect()), + sortable_attributes: v3::Setting::NotSet, + ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| { + criteria.into_iter().map(|criterion| patch_ranking_rules(&criterion)).collect() + }), + stop_words: option_to_setting(settings.stop_words), + synonyms: option_to_setting(settings.synonyms), + distinct_attribute: option_to_setting(settings.distinct_attribute), + _kind: std::marker::PhantomData, + } + } +} + +fn patch_ranking_rules(ranking_rule: &str) -> String { + match v2::settings::Criterion::from_str(ranking_rule) { + Ok(v2::settings::Criterion::Words) => String::from("words"), + Ok(v2::settings::Criterion::Typo) => String::from("typo"), + Ok(v2::settings::Criterion::Proximity) => String::from("proximity"), + Ok(v2::settings::Criterion::Attribute) => String::from("attribute"), + Ok(v2::settings::Criterion::Exactness) => String::from("exactness"), + Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"), + Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"), + // we want to forward the error to the current version of meilisearch + Err(_) => ranking_rule.to_string(), + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v2_v3() { + let dump = File::open("tests/assets/v2.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v2::V2Reader::open(dir).unwrap().to_v3(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79"); + assert_eq!(update_files.len(), 9); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"54b3d7a0d96de35427d867fa17164a99"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"1be82b894556d23953af557b6a328a58"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"1be82b894556d23953af557b6a328a58"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/compat/v3_to_v4.rs b/dump/src/reader/compat/v3_to_v4.rs new file mode 100644 index 000000000..46fe0c9b2 --- /dev/null +++ b/dump/src/reader/compat/v3_to_v4.rs @@ -0,0 +1,450 @@ +use super::v2_to_v3::{CompatIndexV2ToV3, CompatV2ToV3}; +use super::v4_to_v5::CompatV4ToV5; +use crate::reader::{v3, v4, UpdateFile}; +use crate::Result; + +pub enum CompatV3ToV4 { + V3(v3::V3Reader), + Compat(CompatV2ToV3), +} + +impl CompatV3ToV4 { + pub fn new(v3: v3::V3Reader) -> CompatV3ToV4 { + CompatV3ToV4::V3(v3) + } + + pub fn to_v5(self) -> CompatV4ToV5 { + CompatV4ToV5::Compat(self) + } + + pub fn version(&self) -> crate::Version { + match self { + CompatV3ToV4::V3(v3) => v3.version(), + CompatV3ToV4::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + CompatV3ToV4::V3(v3) => v3.date(), + CompatV3ToV4::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + Ok(None) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(match self { + CompatV3ToV4::V3(v3) => { + Box::new(v3.indexes()?.map(|index| index.map(CompatIndexV3ToV4::from))) + as Box> + '_> + } + + CompatV3ToV4::Compat(compat) => { + Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV3ToV4::from))) + as Box> + '_> + } + }) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + let indexes = match self { + CompatV3ToV4::V3(v3) => v3.index_uuid(), + CompatV3ToV4::Compat(compat) => compat.index_uuid(), + }; + let tasks = match self { + CompatV3ToV4::V3(v3) => v3.tasks(), + CompatV3ToV4::Compat(compat) => compat.tasks(), + }; + + Box::new( + tasks + // we need to override the old task ids that were generated + // by index in favor of a global unique incremental ID. + .enumerate() + .map(move |(task_id, task)| { + task.map(|(task, content_file)| { + let index_uid = indexes + .iter() + .find(|index| index.uuid == task.uuid) + .map(|index| index.uid.clone()); + + let index_uid = match index_uid { + Some(uid) => uid, + None => { + log::warn!( + "Error while importing the update {}.", + task.update.id() + ); + log::warn!( + "The index associated to the uuid `{}` could not be retrieved.", + task.uuid.to_string() + ); + if task.update.is_finished() { + // we're fucking with his history but not his data, that's ok-ish. + log::warn!("The index-uuid will be set as `unknown`."); + String::from("unknown") + } else { + log::warn!("The task will be ignored."); + return None; + } + } + }; + + let task = v4::Task { + id: task_id as u32, + index_uid: v4::meta::IndexUid(index_uid), + content: match task.update.meta() { + v3::Kind::DeleteDocuments(documents) => { + v4::tasks::TaskContent::DocumentDeletion( + v4::tasks::DocumentDeletion::Ids(documents.clone()), + ) + } + v3::Kind::DocumentAddition { + primary_key, + method, + content_uuid, + } => v4::tasks::TaskContent::DocumentAddition { + merge_strategy: match method { + v3::updates::IndexDocumentsMethod::ReplaceDocuments => { + v4::tasks::IndexDocumentsMethod::ReplaceDocuments + } + v3::updates::IndexDocumentsMethod::UpdateDocuments => { + v4::tasks::IndexDocumentsMethod::UpdateDocuments + } + }, + primary_key: primary_key.clone(), + documents_count: 0, // we don't have this info + allow_index_creation: true, // there was no API-key in the v3 + content_uuid: *content_uuid, + }, + v3::Kind::Settings(settings) => { + v4::tasks::TaskContent::SettingsUpdate { + settings: v4::Settings::from(settings.clone()), + is_deletion: false, // that didn't exist at this time + allow_index_creation: true, // there was no API-key in the v3 + } + } + v3::Kind::ClearDocuments => { + v4::tasks::TaskContent::DocumentDeletion( + v4::tasks::DocumentDeletion::Clear, + ) + } + }, + events: match task.update { + v3::Status::Processing(processing) => { + vec![v4::tasks::TaskEvent::Created(processing.from.enqueued_at)] + } + v3::Status::Enqueued(enqueued) => { + vec![v4::tasks::TaskEvent::Created(enqueued.enqueued_at)] + } + v3::Status::Processed(processed) => { + vec![ + v4::tasks::TaskEvent::Created( + processed.from.from.enqueued_at, + ), + v4::tasks::TaskEvent::Processing( + processed.from.started_processing_at, + ), + v4::tasks::TaskEvent::Succeded { + result: match processed.success { + v3::updates::UpdateResult::DocumentsAddition( + document_addition, + ) => v4::tasks::TaskResult::DocumentAddition { + indexed_documents: document_addition + .nb_documents + as u64, + }, + v3::updates::UpdateResult::DocumentDeletion { + deleted, + } => v4::tasks::TaskResult::DocumentDeletion { + deleted_documents: deleted, + }, + v3::updates::UpdateResult::Other => { + v4::tasks::TaskResult::Other + } + }, + timestamp: processed.processed_at, + }, + ] + } + v3::Status::Failed(failed) => vec![ + v4::tasks::TaskEvent::Created(failed.from.from.enqueued_at), + v4::tasks::TaskEvent::Processing( + failed.from.started_processing_at, + ), + v4::tasks::TaskEvent::Failed { + error: v4::ResponseError::from_msg( + failed.msg.to_string(), + failed.code.into(), + ), + timestamp: failed.failed_at, + }, + ], + v3::Status::Aborted(aborted) => vec![ + v4::tasks::TaskEvent::Created(aborted.from.enqueued_at), + v4::tasks::TaskEvent::Failed { + error: v4::ResponseError::from_msg( + "Task was aborted in a previous version of meilisearch." + .to_string(), + v4::errors::Code::UnretrievableErrorCode, + ), + timestamp: aborted.aborted_at, + }, + ], + }, + }; + + Some((task, content_file)) + }) + }) + .filter_map(|res| res.transpose()), + ) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new(std::iter::empty()) + } +} + +pub enum CompatIndexV3ToV4 { + V3(v3::V3IndexReader), + Compat(CompatIndexV2ToV3), +} + +impl From for CompatIndexV3ToV4 { + fn from(index_reader: v3::V3IndexReader) -> Self { + Self::V3(index_reader) + } +} + +impl From for CompatIndexV3ToV4 { + fn from(index_reader: CompatIndexV2ToV3) -> Self { + Self::Compat(index_reader) + } +} + +impl CompatIndexV3ToV4 { + pub fn new(v3: v3::V3IndexReader) -> CompatIndexV3ToV4 { + CompatIndexV3ToV4::V3(v3) + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + CompatIndexV3ToV4::V3(v3) => v3.metadata(), + CompatIndexV3ToV4::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + CompatIndexV3ToV4::V3(v3) => v3 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + + CompatIndexV3ToV4::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + Ok(match self { + CompatIndexV3ToV4::V3(v3) => { + v4::Settings::::from(v3.settings()?).check() + } + CompatIndexV3ToV4::Compat(compat) => { + v4::Settings::::from(compat.settings()?).check() + } + }) + } +} + +impl From> for v4::Setting { + fn from(setting: v3::Setting) -> Self { + match setting { + v3::Setting::Set(t) => v4::Setting::Set(t), + v3::Setting::Reset => v4::Setting::Reset, + v3::Setting::NotSet => v4::Setting::NotSet, + } + } +} + +impl From for v4::Code { + fn from(code: v3::Code) -> Self { + match code { + v3::Code::CreateIndex => v4::Code::CreateIndex, + v3::Code::IndexAlreadyExists => v4::Code::IndexAlreadyExists, + v3::Code::IndexNotFound => v4::Code::IndexNotFound, + v3::Code::InvalidIndexUid => v4::Code::InvalidIndexUid, + v3::Code::InvalidState => v4::Code::InvalidState, + v3::Code::MissingPrimaryKey => v4::Code::MissingPrimaryKey, + v3::Code::PrimaryKeyAlreadyPresent => v4::Code::PrimaryKeyAlreadyPresent, + v3::Code::MaxFieldsLimitExceeded => v4::Code::MaxFieldsLimitExceeded, + v3::Code::MissingDocumentId => v4::Code::MissingDocumentId, + v3::Code::InvalidDocumentId => v4::Code::InvalidDocumentId, + v3::Code::Filter => v4::Code::Filter, + v3::Code::Sort => v4::Code::Sort, + v3::Code::BadParameter => v4::Code::BadParameter, + v3::Code::BadRequest => v4::Code::BadRequest, + v3::Code::DatabaseSizeLimitReached => v4::Code::DatabaseSizeLimitReached, + v3::Code::DocumentNotFound => v4::Code::DocumentNotFound, + v3::Code::Internal => v4::Code::Internal, + v3::Code::InvalidGeoField => v4::Code::InvalidGeoField, + v3::Code::InvalidRankingRule => v4::Code::InvalidRankingRule, + v3::Code::InvalidStore => v4::Code::InvalidStore, + v3::Code::InvalidToken => v4::Code::InvalidToken, + v3::Code::MissingAuthorizationHeader => v4::Code::MissingAuthorizationHeader, + v3::Code::NoSpaceLeftOnDevice => v4::Code::NoSpaceLeftOnDevice, + v3::Code::DumpNotFound => v4::Code::DumpNotFound, + v3::Code::TaskNotFound => v4::Code::TaskNotFound, + v3::Code::PayloadTooLarge => v4::Code::PayloadTooLarge, + v3::Code::RetrieveDocument => v4::Code::RetrieveDocument, + v3::Code::SearchDocuments => v4::Code::SearchDocuments, + v3::Code::UnsupportedMediaType => v4::Code::UnsupportedMediaType, + v3::Code::DumpAlreadyInProgress => v4::Code::DumpAlreadyInProgress, + v3::Code::DumpProcessFailed => v4::Code::DumpProcessFailed, + v3::Code::InvalidContentType => v4::Code::InvalidContentType, + v3::Code::MissingContentType => v4::Code::MissingContentType, + v3::Code::MalformedPayload => v4::Code::MalformedPayload, + v3::Code::MissingPayload => v4::Code::MissingPayload, + v3::Code::UnretrievableErrorCode => v4::Code::UnretrievableErrorCode, + v3::Code::MalformedDump => v4::Code::MalformedDump, + } + } +} + +impl From> for v4::Settings { + fn from(settings: v3::Settings) -> Self { + v4::Settings { + displayed_attributes: settings.displayed_attributes.into(), + searchable_attributes: settings.searchable_attributes.into(), + filterable_attributes: settings.filterable_attributes.into(), + sortable_attributes: settings.sortable_attributes.into(), + ranking_rules: settings.ranking_rules.into(), + stop_words: settings.stop_words.into(), + synonyms: settings.synonyms.into(), + distinct_attribute: settings.distinct_attribute.into(), + typo_tolerance: v4::Setting::NotSet, + _kind: std::marker::PhantomData, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v3_v4() { + let dump = File::open("tests/assets/v3.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v3::V3Reader::open(dir).unwrap().to_v4(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"79bc053583a1a7172bbaaafb1edaeb78"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // keys + let keys = dump.keys().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"d3402aff19b90acea9e9a07c466690aa"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"687aaab250f01b55d57bc69aa313b581"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"cd9fedbd7e3492831a94da62c90013ea"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"cd9fedbd7e3492831a94da62c90013ea"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/compat/v4_to_v5.rs b/dump/src/reader/compat/v4_to_v5.rs new file mode 100644 index 000000000..7f985186f --- /dev/null +++ b/dump/src/reader/compat/v4_to_v5.rs @@ -0,0 +1,468 @@ +use super::v3_to_v4::{CompatIndexV3ToV4, CompatV3ToV4}; +use super::v5_to_v6::CompatV5ToV6; +use crate::reader::{v4, v5, Document}; +use crate::Result; + +pub enum CompatV4ToV5 { + V4(v4::V4Reader), + Compat(CompatV3ToV4), +} + +impl CompatV4ToV5 { + pub fn new(v4: v4::V4Reader) -> CompatV4ToV5 { + CompatV4ToV5::V4(v4) + } + + pub fn to_v6(self) -> CompatV5ToV6 { + CompatV5ToV6::Compat(self) + } + + pub fn version(&self) -> crate::Version { + match self { + CompatV4ToV5::V4(v4) => v4.version(), + CompatV4ToV5::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + CompatV4ToV5::V4(v4) => v4.date(), + CompatV4ToV5::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + match self { + CompatV4ToV5::V4(v4) => v4.instance_uid(), + CompatV4ToV5::Compat(compat) => compat.instance_uid(), + } + } + + pub fn indexes(&self) -> Result> + '_>> { + Ok(match self { + CompatV4ToV5::V4(v4) => { + Box::new(v4.indexes()?.map(|index| index.map(CompatIndexV4ToV5::from))) + as Box> + '_> + } + + CompatV4ToV5::Compat(compat) => { + Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV4ToV5::from))) + as Box> + '_> + } + }) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> + { + let tasks = match self { + CompatV4ToV5::V4(v4) => v4.tasks(), + CompatV4ToV5::Compat(compat) => compat.tasks(), + }; + Box::new(tasks.map(|task| { + task.map(|(task, content_file)| { + let task = v5::Task { + id: task.id, + content: match task.content { + v4::tasks::TaskContent::DocumentAddition { + content_uuid, + merge_strategy, + primary_key, + documents_count, + allow_index_creation, + } => v5::tasks::TaskContent::DocumentAddition { + index_uid: v5::meta::IndexUid(task.index_uid.0), + content_uuid, + merge_strategy: match merge_strategy { + v4::tasks::IndexDocumentsMethod::ReplaceDocuments => { + v5::tasks::IndexDocumentsMethod::ReplaceDocuments + } + v4::tasks::IndexDocumentsMethod::UpdateDocuments => { + v5::tasks::IndexDocumentsMethod::UpdateDocuments + } + }, + primary_key, + documents_count, + allow_index_creation, + }, + v4::tasks::TaskContent::DocumentDeletion(deletion) => { + v5::tasks::TaskContent::DocumentDeletion { + index_uid: v5::meta::IndexUid(task.index_uid.0), + deletion: match deletion { + v4::tasks::DocumentDeletion::Clear => { + v5::tasks::DocumentDeletion::Clear + } + v4::tasks::DocumentDeletion::Ids(ids) => { + v5::tasks::DocumentDeletion::Ids(ids) + } + }, + } + } + v4::tasks::TaskContent::SettingsUpdate { + settings, + is_deletion, + allow_index_creation, + } => v5::tasks::TaskContent::SettingsUpdate { + index_uid: v5::meta::IndexUid(task.index_uid.0), + settings: settings.into(), + is_deletion, + allow_index_creation, + }, + v4::tasks::TaskContent::IndexDeletion => { + v5::tasks::TaskContent::IndexDeletion { + index_uid: v5::meta::IndexUid(task.index_uid.0), + } + } + v4::tasks::TaskContent::IndexCreation { primary_key } => { + v5::tasks::TaskContent::IndexCreation { + index_uid: v5::meta::IndexUid(task.index_uid.0), + primary_key, + } + } + v4::tasks::TaskContent::IndexUpdate { primary_key } => { + v5::tasks::TaskContent::IndexUpdate { + index_uid: v5::meta::IndexUid(task.index_uid.0), + primary_key, + } + } + }, + events: task + .events + .into_iter() + .map(|event| match event { + v4::tasks::TaskEvent::Created(date) => { + v5::tasks::TaskEvent::Created(date) + } + v4::tasks::TaskEvent::Batched { timestamp, batch_id } => { + v5::tasks::TaskEvent::Batched { timestamp, batch_id } + } + v4::tasks::TaskEvent::Processing(date) => { + v5::tasks::TaskEvent::Processing(date) + } + v4::tasks::TaskEvent::Succeded { result, timestamp } => { + v5::tasks::TaskEvent::Succeeded { + result: match result { + v4::tasks::TaskResult::DocumentAddition { + indexed_documents, + } => v5::tasks::TaskResult::DocumentAddition { + indexed_documents, + }, + v4::tasks::TaskResult::DocumentDeletion { + deleted_documents, + } => v5::tasks::TaskResult::DocumentDeletion { + deleted_documents, + }, + v4::tasks::TaskResult::ClearAll { deleted_documents } => { + v5::tasks::TaskResult::ClearAll { deleted_documents } + } + v4::tasks::TaskResult::Other => { + v5::tasks::TaskResult::Other + } + }, + timestamp, + } + } + v4::tasks::TaskEvent::Failed { error, timestamp } => { + v5::tasks::TaskEvent::Failed { + error: v5::ResponseError::from(error), + timestamp, + } + } + }) + .collect(), + }; + + (task, content_file) + }) + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + let keys = match self { + CompatV4ToV5::V4(v4) => v4.keys(), + CompatV4ToV5::Compat(compat) => compat.keys(), + }; + Box::new(keys.map(|key| { + key.map(|key| v5::Key { + description: key.description, + name: None, + uid: v5::keys::KeyId::new_v4(), + actions: key.actions.into_iter().filter_map(|action| action.into()).collect(), + indexes: key + .indexes + .into_iter() + .map(|index| match index.as_str() { + "*" => v5::StarOr::Star, + _ => v5::StarOr::Other(v5::meta::IndexUid(index)), + }) + .collect(), + expires_at: key.expires_at, + created_at: key.created_at, + updated_at: key.updated_at, + }) + })) + } +} + +pub enum CompatIndexV4ToV5 { + V4(v4::V4IndexReader), + Compat(CompatIndexV3ToV4), +} + +impl From for CompatIndexV4ToV5 { + fn from(index_reader: v4::V4IndexReader) -> Self { + Self::V4(index_reader) + } +} + +impl From for CompatIndexV4ToV5 { + fn from(index_reader: CompatIndexV3ToV4) -> Self { + Self::Compat(index_reader) + } +} + +impl CompatIndexV4ToV5 { + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + CompatIndexV4ToV5::V4(v4) => v4.metadata(), + CompatIndexV4ToV5::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + CompatIndexV4ToV5::V4(v4) => v4 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + CompatIndexV4ToV5::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + match self { + CompatIndexV4ToV5::V4(v4) => Ok(v5::Settings::from(v4.settings()?).check()), + CompatIndexV4ToV5::Compat(compat) => Ok(v5::Settings::from(compat.settings()?).check()), + } + } +} + +impl From> for v5::Setting { + fn from(setting: v4::Setting) -> Self { + match setting { + v4::Setting::Set(t) => v5::Setting::Set(t), + v4::Setting::Reset => v5::Setting::Reset, + v4::Setting::NotSet => v5::Setting::NotSet, + } + } +} + +impl From for v5::ResponseError { + fn from(error: v4::ResponseError) -> Self { + let code = match error.error_code.as_ref() { + "index_creation_failed" => v5::Code::CreateIndex, + "index_already_exists" => v5::Code::IndexAlreadyExists, + "index_not_found" => v5::Code::IndexNotFound, + "invalid_index_uid" => v5::Code::InvalidIndexUid, + "invalid_min_word_length_for_typo" => v5::Code::InvalidMinWordLengthForTypo, + "invalid_state" => v5::Code::InvalidState, + "primary_key_inference_failed" => v5::Code::MissingPrimaryKey, + "index_primary_key_already_exists" => v5::Code::PrimaryKeyAlreadyPresent, + "max_fields_limit_exceeded" => v5::Code::MaxFieldsLimitExceeded, + "missing_document_id" => v5::Code::MissingDocumentId, + "invalid_document_id" => v5::Code::InvalidDocumentId, + "invalid_filter" => v5::Code::Filter, + "invalid_sort" => v5::Code::Sort, + "bad_parameter" => v5::Code::BadParameter, + "bad_request" => v5::Code::BadRequest, + "database_size_limit_reached" => v5::Code::DatabaseSizeLimitReached, + "document_not_found" => v5::Code::DocumentNotFound, + "internal" => v5::Code::Internal, + "invalid_geo_field" => v5::Code::InvalidGeoField, + "invalid_ranking_rule" => v5::Code::InvalidRankingRule, + "invalid_store_file" => v5::Code::InvalidStore, + "invalid_api_key" => v5::Code::InvalidToken, + "missing_authorization_header" => v5::Code::MissingAuthorizationHeader, + "no_space_left_on_device" => v5::Code::NoSpaceLeftOnDevice, + "dump_not_found" => v5::Code::DumpNotFound, + "task_not_found" => v5::Code::TaskNotFound, + "payload_too_large" => v5::Code::PayloadTooLarge, + "unretrievable_document" => v5::Code::RetrieveDocument, + "search_error" => v5::Code::SearchDocuments, + "unsupported_media_type" => v5::Code::UnsupportedMediaType, + "dump_already_processing" => v5::Code::DumpAlreadyInProgress, + "dump_process_failed" => v5::Code::DumpProcessFailed, + "invalid_content_type" => v5::Code::InvalidContentType, + "missing_content_type" => v5::Code::MissingContentType, + "malformed_payload" => v5::Code::MalformedPayload, + "missing_payload" => v5::Code::MissingPayload, + "api_key_not_found" => v5::Code::ApiKeyNotFound, + "missing_parameter" => v5::Code::MissingParameter, + "invalid_api_key_actions" => v5::Code::InvalidApiKeyActions, + "invalid_api_key_indexes" => v5::Code::InvalidApiKeyIndexes, + "invalid_api_key_expires_at" => v5::Code::InvalidApiKeyExpiresAt, + "invalid_api_key_description" => v5::Code::InvalidApiKeyDescription, + other => { + log::warn!("Unknown error code {}", other); + v5::Code::UnretrievableErrorCode + } + }; + v5::ResponseError::from_msg(error.message, code) + } +} + +impl From> for v5::Settings { + fn from(settings: v4::Settings) -> Self { + v5::Settings { + displayed_attributes: settings.displayed_attributes.into(), + searchable_attributes: settings.searchable_attributes.into(), + filterable_attributes: settings.filterable_attributes.into(), + sortable_attributes: settings.sortable_attributes.into(), + ranking_rules: settings.ranking_rules.into(), + stop_words: settings.stop_words.into(), + synonyms: settings.synonyms.into(), + distinct_attribute: settings.distinct_attribute.into(), + typo_tolerance: match settings.typo_tolerance { + v4::Setting::Set(typo) => v5::Setting::Set(v5::TypoTolerance { + enabled: typo.enabled.into(), + min_word_size_for_typos: match typo.min_word_size_for_typos { + v4::Setting::Set(t) => v5::Setting::Set(v5::MinWordSizeForTypos { + one_typo: t.one_typo.into(), + two_typos: t.two_typos.into(), + }), + v4::Setting::Reset => v5::Setting::Reset, + v4::Setting::NotSet => v5::Setting::NotSet, + }, + disable_on_words: typo.disable_on_words.into(), + disable_on_attributes: typo.disable_on_attributes.into(), + }), + v4::Setting::Reset => v5::Setting::Reset, + v4::Setting::NotSet => v5::Setting::NotSet, + }, + faceting: v5::Setting::NotSet, + pagination: v5::Setting::NotSet, + _kind: std::marker::PhantomData, + } + } +} + +impl From for Option { + fn from(key: v4::Action) -> Self { + match key { + v4::Action::All => Some(v5::Action::All), + v4::Action::Search => Some(v5::Action::Search), + v4::Action::DocumentsAdd => Some(v5::Action::DocumentsAdd), + v4::Action::DocumentsGet => Some(v5::Action::DocumentsGet), + v4::Action::DocumentsDelete => Some(v5::Action::DocumentsDelete), + v4::Action::IndexesAdd => Some(v5::Action::IndexesAdd), + v4::Action::IndexesGet => Some(v5::Action::IndexesGet), + v4::Action::IndexesUpdate => Some(v5::Action::IndexesUpdate), + v4::Action::IndexesDelete => Some(v5::Action::IndexesDelete), + v4::Action::TasksGet => Some(v5::Action::TasksGet), + v4::Action::SettingsGet => Some(v5::Action::SettingsGet), + v4::Action::SettingsUpdate => Some(v5::Action::SettingsUpdate), + v4::Action::StatsGet => Some(v5::Action::StatsGet), + v4::Action::DumpsCreate => Some(v5::Action::DumpsCreate), + v4::Action::DumpsGet => None, + v4::Action::Version => Some(v5::Action::Version), + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v4_v5() { + let dump = File::open("tests/assets/v4.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v4::V4Reader::open(dir).unwrap().to_v5(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"1384361d734fd77c23804c9696228660"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"26947283836ee4cdf0974f82efcc5332"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"156871410d17e23803d0c90ddc6a66cb"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"69c9916142612cf4a2da9b9ed9455e9e"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs new file mode 100644 index 000000000..c95211abc --- /dev/null +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -0,0 +1,488 @@ +use super::v4_to_v5::{CompatIndexV4ToV5, CompatV4ToV5}; +use crate::reader::{v5, v6, Document, UpdateFile}; +use crate::Result; + +pub enum CompatV5ToV6 { + V5(v5::V5Reader), + Compat(CompatV4ToV5), +} + +impl CompatV5ToV6 { + pub fn new_v5(v5: v5::V5Reader) -> CompatV5ToV6 { + CompatV5ToV6::V5(v5) + } + + pub fn version(&self) -> crate::Version { + match self { + CompatV5ToV6::V5(v5) => v5.version(), + CompatV5ToV6::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + CompatV5ToV6::V5(v5) => v5.date(), + CompatV5ToV6::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + match self { + CompatV5ToV6::V5(v5) => v5.instance_uid(), + CompatV5ToV6::Compat(compat) => compat.instance_uid(), + } + } + + pub fn indexes(&self) -> Result> + '_>> { + let indexes = match self { + CompatV5ToV6::V5(v5) => { + Box::new(v5.indexes()?.map(|index| index.map(CompatIndexV5ToV6::from))) + as Box> + '_> + } + + CompatV5ToV6::Compat(compat) => { + Box::new(compat.indexes()?.map(|index| index.map(CompatIndexV5ToV6::from))) + as Box> + '_> + } + }; + Ok(indexes) + } + + pub fn tasks( + &mut self, + ) -> Result>)>> + '_>> { + let instance_uid = self.instance_uid().ok().flatten(); + let keys = self.keys()?.collect::>>()?; + + let tasks = match self { + CompatV5ToV6::V5(v5) => v5.tasks(), + CompatV5ToV6::Compat(compat) => compat.tasks(), + }; + Ok(Box::new(tasks.map(move |task| { + task.map(|(task, content_file)| { + let mut task_view: v5::tasks::TaskView = task.clone().into(); + + if task_view.status == v5::Status::Processing { + task_view.started_at = None; + } + + let task = v6::Task { + uid: task_view.uid, + index_uid: task_view.index_uid, + status: match task_view.status { + v5::Status::Enqueued => v6::Status::Enqueued, + v5::Status::Processing => v6::Status::Enqueued, + v5::Status::Succeeded => v6::Status::Succeeded, + v5::Status::Failed => v6::Status::Failed, + }, + kind: match task.content { + v5::tasks::TaskContent::IndexCreation { primary_key, .. } => { + v6::Kind::IndexCreation { primary_key } + } + v5::tasks::TaskContent::IndexUpdate { primary_key, .. } => { + v6::Kind::IndexUpdate { primary_key } + } + v5::tasks::TaskContent::IndexDeletion { .. } => v6::Kind::IndexDeletion, + v5::tasks::TaskContent::DocumentAddition { + merge_strategy, + allow_index_creation, + primary_key, + documents_count, + .. + } => v6::Kind::DocumentImport { + primary_key, + documents_count: documents_count as u64, + method: match merge_strategy { + v5::tasks::IndexDocumentsMethod::ReplaceDocuments => { + v6::milli::update::IndexDocumentsMethod::ReplaceDocuments + } + v5::tasks::IndexDocumentsMethod::UpdateDocuments => { + v6::milli::update::IndexDocumentsMethod::UpdateDocuments + } + }, + allow_index_creation, + }, + v5::tasks::TaskContent::DocumentDeletion { deletion, .. } => match deletion + { + v5::tasks::DocumentDeletion::Clear => v6::Kind::DocumentClear, + v5::tasks::DocumentDeletion::Ids(documents_ids) => { + v6::Kind::DocumentDeletion { documents_ids } + } + }, + v5::tasks::TaskContent::SettingsUpdate { + allow_index_creation, + is_deletion, + settings, + .. + } => v6::Kind::Settings { + is_deletion, + allow_index_creation, + settings: Box::new(settings.into()), + }, + v5::tasks::TaskContent::Dump { uid: _ } => { + // in v6 we compute the dump_uid from the started_at processing time + v6::Kind::DumpCreation { keys: keys.clone(), instance_uid } + } + }, + canceled_by: None, + details: task_view.details.map(|details| match details { + v5::Details::DocumentAddition { received_documents, indexed_documents } => { + v6::Details::DocumentAdditionOrUpdate { + received_documents: received_documents as u64, + indexed_documents, + } + } + v5::Details::Settings { settings } => { + v6::Details::SettingsUpdate { settings: Box::new(settings.into()) } + } + v5::Details::IndexInfo { primary_key } => { + v6::Details::IndexInfo { primary_key } + } + v5::Details::DocumentDeletion { + received_document_ids, + deleted_documents, + } => v6::Details::DocumentDeletion { + provided_ids: received_document_ids, + deleted_documents, + }, + v5::Details::ClearAll { deleted_documents } => { + v6::Details::ClearAll { deleted_documents } + } + v5::Details::Dump { dump_uid } => { + v6::Details::Dump { dump_uid: Some(dump_uid) } + } + }), + error: task_view.error.map(|e| e.into()), + enqueued_at: task_view.enqueued_at, + started_at: task_view.started_at, + finished_at: task_view.finished_at, + }; + + (task, content_file) + }) + }))) + } + + pub fn keys(&mut self) -> Result> + '_>> { + let keys = match self { + CompatV5ToV6::V5(v5) => v5.keys()?, + CompatV5ToV6::Compat(compat) => compat.keys(), + }; + + Ok(Box::new(keys.map(|key| { + key.map(|key| v6::Key { + description: key.description, + name: key.name, + uid: key.uid, + actions: key.actions.into_iter().map(|action| action.into()).collect(), + indexes: key + .indexes + .into_iter() + .map(|index| match index { + v5::StarOr::Star => v6::StarOr::Star, + v5::StarOr::Other(uid) => { + v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str())) + } + }) + .collect(), + expires_at: key.expires_at, + created_at: key.created_at, + updated_at: key.updated_at, + }) + }))) + } +} + +pub enum CompatIndexV5ToV6 { + V5(v5::V5IndexReader), + Compat(CompatIndexV4ToV5), +} + +impl From for CompatIndexV5ToV6 { + fn from(index_reader: v5::V5IndexReader) -> Self { + Self::V5(index_reader) + } +} + +impl From for CompatIndexV5ToV6 { + fn from(index_reader: CompatIndexV4ToV5) -> Self { + Self::Compat(index_reader) + } +} + +impl CompatIndexV5ToV6 { + pub fn new_v5(v5: v5::V5IndexReader) -> CompatIndexV5ToV6 { + CompatIndexV5ToV6::V5(v5) + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + CompatIndexV5ToV6::V5(v5) => v5.metadata(), + CompatIndexV5ToV6::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + CompatIndexV5ToV6::V5(v5) => v5 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + CompatIndexV5ToV6::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + match self { + CompatIndexV5ToV6::V5(v5) => Ok(v6::Settings::from(v5.settings()?).check()), + CompatIndexV5ToV6::Compat(compat) => Ok(v6::Settings::from(compat.settings()?).check()), + } + } +} + +impl From> for v6::Setting { + fn from(setting: v5::Setting) -> Self { + match setting { + v5::Setting::Set(t) => v6::Setting::Set(t), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + } + } +} + +impl From for v6::ResponseError { + fn from(error: v5::ResponseError) -> Self { + let code = match error.error_code.as_ref() { + "index_creation_failed" => v6::Code::CreateIndex, + "index_already_exists" => v6::Code::IndexAlreadyExists, + "index_not_found" => v6::Code::IndexNotFound, + "invalid_index_uid" => v6::Code::InvalidIndexUid, + "invalid_min_word_length_for_typo" => v6::Code::InvalidMinWordLengthForTypo, + "invalid_state" => v6::Code::InvalidState, + "primary_key_inference_failed" => v6::Code::MissingPrimaryKey, + "index_primary_key_already_exists" => v6::Code::PrimaryKeyAlreadyPresent, + "max_fields_limit_exceeded" => v6::Code::MaxFieldsLimitExceeded, + "missing_document_id" => v6::Code::MissingDocumentId, + "invalid_document_id" => v6::Code::InvalidDocumentId, + "invalid_filter" => v6::Code::Filter, + "invalid_sort" => v6::Code::Sort, + "bad_parameter" => v6::Code::BadParameter, + "bad_request" => v6::Code::BadRequest, + "database_size_limit_reached" => v6::Code::DatabaseSizeLimitReached, + "document_not_found" => v6::Code::DocumentNotFound, + "internal" => v6::Code::Internal, + "invalid_geo_field" => v6::Code::InvalidGeoField, + "invalid_ranking_rule" => v6::Code::InvalidRankingRule, + "invalid_store_file" => v6::Code::InvalidStore, + "invalid_api_key" => v6::Code::InvalidToken, + "missing_authorization_header" => v6::Code::MissingAuthorizationHeader, + "no_space_left_on_device" => v6::Code::NoSpaceLeftOnDevice, + "dump_not_found" => v6::Code::DumpNotFound, + "task_not_found" => v6::Code::TaskNotFound, + "payload_too_large" => v6::Code::PayloadTooLarge, + "unretrievable_document" => v6::Code::RetrieveDocument, + "search_error" => v6::Code::SearchDocuments, + "unsupported_media_type" => v6::Code::UnsupportedMediaType, + "dump_already_processing" => v6::Code::DumpAlreadyInProgress, + "dump_process_failed" => v6::Code::DumpProcessFailed, + "invalid_content_type" => v6::Code::InvalidContentType, + "missing_content_type" => v6::Code::MissingContentType, + "malformed_payload" => v6::Code::MalformedPayload, + "missing_payload" => v6::Code::MissingPayload, + "api_key_not_found" => v6::Code::ApiKeyNotFound, + "missing_parameter" => v6::Code::MissingParameter, + "invalid_api_key_actions" => v6::Code::InvalidApiKeyActions, + "invalid_api_key_indexes" => v6::Code::InvalidApiKeyIndexes, + "invalid_api_key_expires_at" => v6::Code::InvalidApiKeyExpiresAt, + "invalid_api_key_description" => v6::Code::InvalidApiKeyDescription, + "invalid_api_key_name" => v6::Code::InvalidApiKeyName, + "invalid_api_key_uid" => v6::Code::InvalidApiKeyUid, + "immutable_field" => v6::Code::ImmutableField, + "api_key_already_exists" => v6::Code::ApiKeyAlreadyExists, + other => { + log::warn!("Unknown error code {}", other); + v6::Code::UnretrievableErrorCode + } + }; + v6::ResponseError::from_msg(error.message, code) + } +} + +impl From> for v6::Settings { + fn from(settings: v5::Settings) -> Self { + v6::Settings { + displayed_attributes: settings.displayed_attributes.into(), + searchable_attributes: settings.searchable_attributes.into(), + filterable_attributes: settings.filterable_attributes.into(), + sortable_attributes: settings.sortable_attributes.into(), + ranking_rules: settings.ranking_rules.into(), + stop_words: settings.stop_words.into(), + synonyms: settings.synonyms.into(), + distinct_attribute: settings.distinct_attribute.into(), + typo_tolerance: match settings.typo_tolerance { + v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance { + enabled: typo.enabled.into(), + min_word_size_for_typos: match typo.min_word_size_for_typos { + v5::Setting::Set(t) => v6::Setting::Set(v6::MinWordSizeForTypos { + one_typo: t.one_typo.into(), + two_typos: t.two_typos.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + disable_on_words: typo.disable_on_words.into(), + disable_on_attributes: typo.disable_on_attributes.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + faceting: match settings.faceting { + v5::Setting::Set(faceting) => v6::Setting::Set(v6::FacetingSettings { + max_values_per_facet: faceting.max_values_per_facet.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + pagination: match settings.pagination { + v5::Setting::Set(pagination) => v6::Setting::Set(v6::PaginationSettings { + max_total_hits: pagination.max_total_hits.into(), + }), + v5::Setting::Reset => v6::Setting::Reset, + v5::Setting::NotSet => v6::Setting::NotSet, + }, + _kind: std::marker::PhantomData, + } + } +} + +impl From for v6::Action { + fn from(key: v5::Action) -> Self { + match key { + v5::Action::All => v6::Action::All, + v5::Action::Search => v6::Action::Search, + v5::Action::DocumentsAll => v6::Action::DocumentsAll, + v5::Action::DocumentsAdd => v6::Action::DocumentsAdd, + v5::Action::DocumentsGet => v6::Action::DocumentsGet, + v5::Action::DocumentsDelete => v6::Action::DocumentsDelete, + v5::Action::IndexesAll => v6::Action::IndexesAll, + v5::Action::IndexesAdd => v6::Action::IndexesAdd, + v5::Action::IndexesGet => v6::Action::IndexesGet, + v5::Action::IndexesUpdate => v6::Action::IndexesUpdate, + v5::Action::IndexesDelete => v6::Action::IndexesDelete, + v5::Action::TasksAll => v6::Action::TasksAll, + v5::Action::TasksGet => v6::Action::TasksGet, + v5::Action::SettingsAll => v6::Action::SettingsAll, + v5::Action::SettingsGet => v6::Action::SettingsGet, + v5::Action::SettingsUpdate => v6::Action::SettingsUpdate, + v5::Action::StatsAll => v6::Action::StatsAll, + v5::Action::StatsGet => v6::Action::StatsGet, + v5::Action::MetricsAll => v6::Action::MetricsAll, + v5::Action::MetricsGet => v6::Action::MetricsGet, + v5::Action::DumpsAll => v6::Action::DumpsAll, + v5::Action::DumpsCreate => v6::Action::DumpsCreate, + v5::Action::Version => v6::Action::Version, + v5::Action::KeysAdd => v6::Action::KeysAdd, + v5::Action::KeysGet => v6::Action::KeysGet, + v5::Action::KeysUpdate => v6::Action::KeysUpdate, + v5::Action::KeysDelete => v6::Action::KeysDelete, + } + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn compat_v5_v6() { + let dump = File::open("tests/assets/v5.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = v5::V5Reader::open(dir).unwrap().to_v6(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"42d4200cf6d92a6449989ca48cd8e28a"); + assert_eq!(update_files.len(), 22); + assert!(update_files[0].is_none()); // the dump creation + assert!(update_files[1].is_some()); // the enqueued document addition + assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"c9d2b467fe2fca0b35580d8a999808fb"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"8e5cadabf74aebe1160bf51c3d489efe"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4894ac1e74b9e1069ed5ee262b7a1aca"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 200); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"054dbf08a79e08bb9becba6f5d090f13"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/meilisearch-lib/src/dump/error.rs b/dump/src/reader/error.rs similarity index 100% rename from meilisearch-lib/src/dump/error.rs rename to dump/src/reader/error.rs diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs new file mode 100644 index 000000000..9d0f6ae88 --- /dev/null +++ b/dump/src/reader/mod.rs @@ -0,0 +1,535 @@ +use std::fs::File; +use std::io::{BufReader, Read}; + +use flate2::bufread::GzDecoder; +use serde::Deserialize; +use tempfile::TempDir; + +use self::compat::v4_to_v5::CompatV4ToV5; +use self::compat::v5_to_v6::{CompatIndexV5ToV6, CompatV5ToV6}; +use self::v5::V5Reader; +use self::v6::{V6IndexReader, V6Reader}; +use crate::{Error, Result, Version}; + +mod compat; + +// pub(self) mod v1; +pub(self) mod v2; +pub(self) mod v3; +pub(self) mod v4; +pub(self) mod v5; +pub(self) mod v6; + +pub type Document = serde_json::Map; +pub type UpdateFile = dyn Iterator>; + +pub enum DumpReader { + Current(V6Reader), + Compat(CompatV5ToV6), +} + +impl DumpReader { + pub fn open(dump: impl Read) -> Result { + let path = TempDir::new()?; + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(path.path())?; + + #[derive(Deserialize)] + #[serde(rename_all = "camelCase")] + struct MetadataVersion { + pub dump_version: Version, + } + let mut meta_file = File::open(path.path().join("metadata.json"))?; + let MetadataVersion { dump_version } = serde_json::from_reader(&mut meta_file)?; + + match dump_version { + // Version::V1 => Ok(Box::new(v1::Reader::open(path)?)), + Version::V1 => Err(Error::DumpV1Unsupported), + Version::V2 => Ok(v2::V2Reader::open(path)?.to_v3().to_v4().to_v5().to_v6().into()), + Version::V3 => Ok(v3::V3Reader::open(path)?.to_v4().to_v5().to_v6().into()), + Version::V4 => Ok(v4::V4Reader::open(path)?.to_v5().to_v6().into()), + Version::V5 => Ok(v5::V5Reader::open(path)?.to_v6().into()), + Version::V6 => Ok(v6::V6Reader::open(path)?.into()), + } + } + + pub fn version(&self) -> crate::Version { + match self { + DumpReader::Current(current) => current.version(), + DumpReader::Compat(compat) => compat.version(), + } + } + + pub fn date(&self) -> Option { + match self { + DumpReader::Current(current) => current.date(), + DumpReader::Compat(compat) => compat.date(), + } + } + + pub fn instance_uid(&self) -> Result> { + match self { + DumpReader::Current(current) => current.instance_uid(), + DumpReader::Compat(compat) => compat.instance_uid(), + } + } + + pub fn indexes(&self) -> Result> + '_>> { + match self { + DumpReader::Current(current) => { + let indexes = Box::new(current.indexes()?.map(|res| res.map(DumpIndexReader::from))) + as Box> + '_>; + Ok(indexes) + } + DumpReader::Compat(compat) => { + let indexes = Box::new(compat.indexes()?.map(|res| res.map(DumpIndexReader::from))) + as Box> + '_>; + Ok(indexes) + } + } + } + + pub fn tasks( + &mut self, + ) -> Result>)>> + '_>> { + match self { + DumpReader::Current(current) => Ok(current.tasks()), + DumpReader::Compat(compat) => compat.tasks(), + } + } + + pub fn keys(&mut self) -> Result> + '_>> { + match self { + DumpReader::Current(current) => Ok(current.keys()), + DumpReader::Compat(compat) => compat.keys(), + } + } +} + +impl From for DumpReader { + fn from(value: V6Reader) -> Self { + DumpReader::Current(value) + } +} + +impl From for DumpReader { + fn from(value: CompatV5ToV6) -> Self { + DumpReader::Compat(value) + } +} + +impl From for DumpReader { + fn from(value: V5Reader) -> Self { + DumpReader::Compat(value.to_v6()) + } +} + +impl From for DumpReader { + fn from(value: CompatV4ToV5) -> Self { + DumpReader::Compat(value.to_v6()) + } +} + +pub enum DumpIndexReader { + Current(v6::V6IndexReader), + Compat(Box), +} + +impl DumpIndexReader { + pub fn new_v6(v6: v6::V6IndexReader) -> DumpIndexReader { + DumpIndexReader::Current(v6) + } + + pub fn metadata(&self) -> &crate::IndexMetadata { + match self { + DumpIndexReader::Current(v6) => v6.metadata(), + DumpIndexReader::Compat(compat) => compat.metadata(), + } + } + + pub fn documents(&mut self) -> Result> + '_>> { + match self { + DumpIndexReader::Current(v6) => v6 + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + DumpIndexReader::Compat(compat) => compat + .documents() + .map(|iter| Box::new(iter) as Box> + '_>), + } + } + + pub fn settings(&mut self) -> Result> { + match self { + DumpIndexReader::Current(v6) => v6.settings(), + DumpIndexReader::Compat(compat) => compat.settings(), + } + } +} + +impl From for DumpIndexReader { + fn from(value: V6IndexReader) -> Self { + DumpIndexReader::Current(value) + } +} + +impl From for DumpIndexReader { + fn from(value: CompatIndexV5ToV6) -> Self { + DumpIndexReader::Compat(Box::new(value)) + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + + use meili_snap::insta; + + use super::*; + + #[test] + #[ignore] + fn import_dump_v5() { + let dump = File::open("tests/assets/v5.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"42d4200cf6d92a6449989ca48cd8e28a"); + assert_eq!(update_files.len(), 22); + assert!(update_files[0].is_none()); // the dump creation + assert!(update_files[1].is_some()); // the enqueued document addition + assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"c9d2b467fe2fca0b35580d8a999808fb"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"8e5cadabf74aebe1160bf51c3d489efe"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"4894ac1e74b9e1069ed5ee262b7a1aca"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 200); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"054dbf08a79e08bb9becba6f5d090f13"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } + + #[test] + #[ignore] + fn import_dump_v4() { + let dump = File::open("tests/assets/v4.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"491e244a80a19fe2a900b809d310c24a"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"1f9da51a4518166fb440def5437eafdb"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"488816aba82c1bd65f1609630055c611"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7b4f66dad597dc651650f35fe34be27f"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } + + #[test] + #[ignore] + fn import_dump_v3() { + let dump = File::open("tests/assets/v3.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); + assert_eq!(dump.instance_uid().unwrap(), None); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"7cacce2e21702be696b866808c726946"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"855f3165dec609b919171ff83f82b364"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"43e0bf1746c3ea1d64c1e10ea544c190"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"5fd06a5038f49311600379d43412b655"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"5fd06a5038f49311600379d43412b655"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } + + #[test] + #[ignore] + fn import_dump_v2() { + let dump = File::open("tests/assets/v2.dump").unwrap(); + let mut dump = DumpReader::open(dump).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); + assert_eq!(dump.instance_uid().unwrap(), None); + + // tasks + let tasks = dump.tasks().unwrap().collect::>>().unwrap(); + let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6cabec4e252b74c8f3a2c8517622e85f"); + assert_eq!(update_files.len(), 9); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"b15b71f56dd082d8e8ec5182e688bf36"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"8aebab01301d266acf3e18dd449c008f"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"8aebab01301d266acf3e18dd449c008f"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v1/mod.rs b/dump/src/reader/v1/mod.rs new file mode 100644 index 000000000..f638262cc --- /dev/null +++ b/dump/src/reader/v1/mod.rs @@ -0,0 +1,173 @@ +use std::{ + convert::Infallible, + fs::{self, File}, + io::{BufRead, BufReader}, + path::Path, +}; + +use tempfile::TempDir; +use time::OffsetDateTime; + +use self::update::UpdateStatus; + +use super::{DumpReader, IndexReader}; +use crate::{Error, Result, Version}; + +pub mod settings; +pub mod update; +pub mod v1; + +pub struct V1Reader { + dump: TempDir, + metadata: v1::Metadata, + indexes: Vec, +} + +struct V1IndexReader { + name: String, + documents: BufReader, + settings: BufReader, + updates: BufReader, + + current_update: Option, +} + +impl V1IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let mut ret = V1IndexReader { + name, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + updates: BufReader::new(File::open(path.join("updates.jsonl"))?), + current_update: None, + }; + ret.next_update(); + + Ok(ret) + } + + pub fn next_update(&mut self) -> Result> { + let current_update = if let Some(line) = self.updates.lines().next() { + Some(serde_json::from_str(&line?)?) + } else { + None + }; + + Ok(std::mem::replace(&mut self.current_update, current_update)) + } +} + +impl V1Reader { + pub fn open(dump: TempDir) -> Result { + let mut meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + + let mut indexes = Vec::new(); + + let entries = fs::read_dir(dump.path())?; + for entry in entries { + let entry = entry?; + if entry.file_type()?.is_dir() { + indexes.push(V1IndexReader::new( + entry + .file_name() + .to_str() + .ok_or(Error::BadIndexName)? + .to_string(), + &entry.path(), + )?); + } + } + + Ok(V1Reader { + dump, + metadata, + indexes, + }) + } + + fn next_update(&mut self) -> Result> { + if let Some((idx, _)) = self + .indexes + .iter() + .map(|index| index.current_update) + .enumerate() + .filter_map(|(idx, update)| update.map(|u| (idx, u))) + .min_by_key(|(_, update)| update.enqueued_at()) + { + self.indexes[idx].next_update() + } else { + Ok(None) + } + } +} + +impl IndexReader for &V1IndexReader { + type Document = serde_json::Map; + type Settings = settings::Settings; + + fn name(&self) -> &str { + todo!() + } + + fn documents(&self) -> Result>>> { + todo!() + } + + fn settings(&self) -> Result { + todo!() + } +} + +impl DumpReader for V1Reader { + type Document = serde_json::Map; + type Settings = settings::Settings; + + type Task = update::UpdateStatus; + type UpdateFile = Infallible; + + type Key = Infallible; + + fn date(&self) -> Option { + None + } + + fn version(&self) -> Version { + Version::V1 + } + + fn indexes( + &self, + ) -> Result< + Box< + dyn Iterator< + Item = Result< + Box< + dyn super::IndexReader< + Document = Self::Document, + Settings = Self::Settings, + >, + >, + >, + >, + >, + > { + Ok(Box::new(self.indexes.iter().map(|index| { + let index = Box::new(index) + as Box>; + Ok(index) + }))) + } + + fn tasks(&self) -> Box)>>> { + Box::new(std::iter::from_fn(|| { + self.next_update() + .transpose() + .map(|result| result.map(|task| (task, None))) + })) + } + + fn keys(&self) -> Box>> { + Box::new(std::iter::empty()) + } +} diff --git a/dump/src/reader/v1/settings.rs b/dump/src/reader/v1/settings.rs new file mode 100644 index 000000000..0065d3f97 --- /dev/null +++ b/dump/src/reader/v1/settings.rs @@ -0,0 +1,63 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::result::Result as StdResult; + +use serde::{Deserialize, Deserializer, Serialize}; + +#[derive(Default, Clone, Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct Settings { + #[serde(default, deserialize_with = "deserialize_some")] + pub ranking_rules: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub distinct_attribute: Option>, + #[serde(default, deserialize_with = "deserialize_some")] + pub searchable_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub displayed_attributes: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub stop_words: Option>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub synonyms: Option>>>, + #[serde(default, deserialize_with = "deserialize_some")] + pub attributes_for_faceting: Option>>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SettingsUpdate { + pub ranking_rules: UpdateState>, + pub distinct_attribute: UpdateState, + pub primary_key: UpdateState, + pub searchable_attributes: UpdateState>, + pub displayed_attributes: UpdateState>, + pub stop_words: UpdateState>, + pub synonyms: UpdateState>>, + pub attributes_for_faceting: UpdateState>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum UpdateState { + Update(T), + Clear, + Nothing, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum RankingRule { + Typo, + Words, + Proximity, + Attribute, + WordsPosition, + Exactness, + Asc(String), + Desc(String), +} + +// Any value that is present is considered Some value, including null. +fn deserialize_some<'de, T, D>(deserializer: D) -> StdResult, D::Error> +where + T: Deserialize<'de>, + D: Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} diff --git a/dump/src/reader/v1/update.rs b/dump/src/reader/v1/update.rs new file mode 100644 index 000000000..c9ccaf309 --- /dev/null +++ b/dump/src/reader/v1/update.rs @@ -0,0 +1,120 @@ +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use time::OffsetDateTime; + +use super::settings::SettingsUpdate; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Update { + data: UpdateData, + #[serde(with = "time::serde::rfc3339")] + enqueued_at: OffsetDateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum UpdateData { + ClearAll, + Customs(Vec), + // (primary key, documents) + DocumentsAddition { + primary_key: Option, + documents: Vec>, + }, + DocumentsPartial { + primary_key: Option, + documents: Vec>, + }, + DocumentsDeletion(Vec), + Settings(Box), +} + +impl UpdateData { + pub fn update_type(&self) -> UpdateType { + match self { + UpdateData::ClearAll => UpdateType::ClearAll, + UpdateData::Customs(_) => UpdateType::Customs, + UpdateData::DocumentsAddition { documents, .. } => UpdateType::DocumentsAddition { + number: documents.len(), + }, + UpdateData::DocumentsPartial { documents, .. } => UpdateType::DocumentsPartial { + number: documents.len(), + }, + UpdateData::DocumentsDeletion(deletion) => UpdateType::DocumentsDeletion { + number: deletion.len(), + }, + UpdateData::Settings(update) => UpdateType::Settings { + settings: update.clone(), + }, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "name")] +pub enum UpdateType { + ClearAll, + Customs, + DocumentsAddition { number: usize }, + DocumentsPartial { number: usize }, + DocumentsDeletion { number: usize }, + Settings { settings: Box }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct ProcessedUpdateResult { + pub update_id: u64, + #[serde(rename = "type")] + pub update_type: UpdateType, + #[serde(skip_serializing_if = "Option::is_none")] + pub error: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_type: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_code: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub error_link: Option, + pub duration: f64, // in seconds + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub processed_at: OffsetDateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct EnqueuedUpdateResult { + pub update_id: u64, + #[serde(rename = "type")] + pub update_type: UpdateType, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase", tag = "status")] +pub enum UpdateStatus { + Enqueued { + #[serde(flatten)] + content: EnqueuedUpdateResult, + }, + Failed { + #[serde(flatten)] + content: ProcessedUpdateResult, + }, + Processed { + #[serde(flatten)] + content: ProcessedUpdateResult, + }, +} + +impl UpdateStatus { + pub fn enqueued_at(&self) -> &OffsetDateTime { + match self { + UpdateStatus::Enqueued { content } => &content.enqueued_at, + UpdateStatus::Failed { content } | UpdateStatus::Processed { content } => { + &content.enqueued_at + } + } + } +} diff --git a/dump/src/reader/v1/v1.rs b/dump/src/reader/v1/v1.rs new file mode 100644 index 000000000..0f4312508 --- /dev/null +++ b/dump/src/reader/v1/v1.rs @@ -0,0 +1,22 @@ +use serde::Deserialize; +use time::OffsetDateTime; + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Index { + pub name: String, + pub uid: String, + #[serde(with = "time::serde::rfc3339")] + created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + updated_at: OffsetDateTime, + pub primary_key: Option, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + indexes: Vec, + db_version: String, + dump_version: crate::Version, +} diff --git a/dump/src/reader/v2/errors.rs b/dump/src/reader/v2/errors.rs new file mode 100644 index 000000000..dc9740f90 --- /dev/null +++ b/dump/src/reader/v2/errors.rs @@ -0,0 +1,14 @@ +use http::StatusCode; +use serde::Deserialize; + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct ResponseError { + #[serde(skip)] + pub code: StatusCode, + pub message: String, + pub error_code: String, + pub error_type: String, + pub error_link: String, +} diff --git a/dump/src/reader/v2/meta.rs b/dump/src/reader/v2/meta.rs new file mode 100644 index 000000000..f83762914 --- /dev/null +++ b/dump/src/reader/v2/meta.rs @@ -0,0 +1,18 @@ +use serde::Deserialize; +use uuid::Uuid; + +use super::Settings; + +#[derive(Deserialize, Debug, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub uuid: Uuid, +} + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} diff --git a/dump/src/reader/v2/mod.rs b/dump/src/reader/v2/mod.rs new file mode 100644 index 000000000..3a9e3e587 --- /dev/null +++ b/dump/src/reader/v2/mod.rs @@ -0,0 +1,310 @@ +//! ```text +//! . +//! ├── indexes +//! │   ├── index-40d14c5f-37ae-4873-9d51-b69e014a0d30 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── index-88202369-4524-4410-9b3d-3e924c867fec +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── index-b7f2d03b-bf9b-40d9-a25b-94dc5ec60c32 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── index-dc9070b3-572d-4f30-ab45-d4903ab71708 +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── update_files +//! └── update_202573df-718b-4d80-9a65-2ee397c23dc3 +//! ``` + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; + +pub mod errors; +pub mod meta; +pub mod settings; +pub mod updates; + +use self::meta::{DumpMeta, IndexUuid}; +use super::compat::v2_to_v3::CompatV2ToV3; +use super::Document; +use crate::{IndexMetadata, Result, Version}; + +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = updates::UpdateEntry; + +// everything related to the errors +pub type ResponseError = errors::ResponseError; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V2Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + pub index_uuid: Vec, +} + +impl V2Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V2Reader { + metadata, + tasks: BufReader::new( + File::open(dump.path().join("updates").join("data.jsonl")).unwrap(), + ), + index_uuid, + dump, + }) + } + + pub fn to_v3(self) -> CompatV2ToV3 { + CompatV2ToV3::new(self) + } + + pub fn index_uuid(&self) -> Vec { + self.index_uuid.clone() + } + + pub fn version(&self) -> Version { + Version::V2 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V2IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)), + ) + })) + } + + pub fn tasks(&mut self) -> Box)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("update_files") + .join(format!("update_{}", uuid)); + Ok((task, Some(UpdateFile::new(&update_file_path)?))) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } +} + +pub struct V2IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V2IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V2IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + documents: Vec, + index: usize, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + let reader = BufReader::new(File::open(path)?); + Ok(UpdateFile { documents: serde_json::from_reader(reader)?, index: 0 }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + self.index += 1; + self.documents.get(self.index - 1).cloned().map(Ok) + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v2() { + let dump = File::open("tests/assets/v2.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V2Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-09 20:27:59.904096267 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ec5fc0a14bf735ad4e361d5aa8a89ac6"); + assert_eq!(update_files.len(), 9); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"c41bf7315d404da46c99b9e3a2a3cc1e"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"3d1d96c85b6bab46e957bc8d2532a910"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"4f04afc086828d8da0da57a7d598ddba"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"4f04afc086828d8da0da57a7d598ddba"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v2/settings.rs b/dump/src/reader/v2/settings.rs new file mode 100644 index 000000000..62e5c05f9 --- /dev/null +++ b/dump/src/reader/v2/settings.rs @@ -0,0 +1,176 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; +use std::str::FromStr; + +use once_cell::sync::Lazy; +use regex::Regex; +use serde::{Deserialize, Deserializer}; + +#[cfg(test)] +fn serialize_with_wildcard( + field: &Option>>, + s: S, +) -> std::result::Result +where + S: serde::Serializer, +{ + let wildcard = vec!["*".to_string()]; + s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard))) +} + +fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result, D::Error> +where + T: Deserialize<'de>, + D: Deserializer<'de>, +{ + Deserialize::deserialize(deserializer).map(Some) +} + +#[derive(Clone, Default, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Checked; +#[derive(Clone, Default, Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Unchecked; + +#[derive(Debug, Clone, Default, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde( + default, + deserialize_with = "deserialize_some", + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Option::is_none" + )] + pub displayed_attributes: Option>>, + + #[serde( + default, + deserialize_with = "deserialize_some", + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Option::is_none" + )] + pub searchable_attributes: Option>>, + + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub filterable_attributes: Option>>, + + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub ranking_rules: Option>>, + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub stop_words: Option>>, + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub synonyms: Option>>>, + #[serde( + default, + deserialize_with = "deserialize_some", + skip_serializing_if = "Option::is_none" + )] + pub distinct_attribute: Option>, + + #[serde(skip)] + pub _kind: PhantomData, +} + +impl Settings { + pub fn check(mut self) -> Settings { + let displayed_attributes = match self.displayed_attributes.take() { + Some(Some(fields)) => { + if fields.iter().any(|f| f == "*") { + Some(None) + } else { + Some(Some(fields)) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes.take() { + Some(Some(fields)) => { + if fields.iter().any(|f| f == "*") { + Some(None) + } else { + Some(Some(fields)) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + _kind: PhantomData, + } + } +} + +static ASC_DESC_REGEX: Lazy = + Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap()); + +#[derive(Debug, Deserialize, Clone, PartialEq, Eq)] +pub enum Criterion { + /// Sorted by decreasing number of matched query terms. + /// Query words at the front of an attribute is considered better than if it was at the back. + Words, + /// Sorted by increasing number of typos. + Typo, + /// Sorted by increasing distance between matched query terms. + Proximity, + /// Documents with quey words contained in more important + /// attributes are considred better. + Attribute, + /// Sorted by the similarity of the matched words with the query words. + Exactness, + /// Sorted by the increasing value of the field specified. + Asc(String), + /// Sorted by the decreasing value of the field specified. + Desc(String), +} + +impl FromStr for Criterion { + type Err = (); + + fn from_str(txt: &str) -> Result { + match txt { + "words" => Ok(Criterion::Words), + "typo" => Ok(Criterion::Typo), + "proximity" => Ok(Criterion::Proximity), + "attribute" => Ok(Criterion::Attribute), + "exactness" => Ok(Criterion::Exactness), + text => { + let caps = ASC_DESC_REGEX.captures(text).ok_or(())?; + let order = caps.get(1).unwrap().as_str(); + let field_name = caps.get(2).unwrap().as_str(); + match order { + "asc" => Ok(Criterion::Asc(field_name.to_string())), + "desc" => Ok(Criterion::Desc(field_name.to_string())), + _text => Err(()), + } + } + } + } +} diff --git a/dump/src/reader/v2/updates.rs b/dump/src/reader/v2/updates.rs new file mode 100644 index 000000000..33d88d46f --- /dev/null +++ b/dump/src/reader/v2/updates.rs @@ -0,0 +1,230 @@ +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::{ResponseError, Settings, Unchecked}; + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct UpdateEntry { + pub uuid: Uuid, + pub update: UpdateStatus, +} + +impl UpdateEntry { + pub fn is_finished(&self) -> bool { + match self.update { + UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false, + UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true, + } + } + + pub fn get_content_uuid(&self) -> Option<&Uuid> { + match &self.update { + UpdateStatus::Enqueued(enqueued) => enqueued.content.as_ref(), + UpdateStatus::Processing(processing) => processing.from.content.as_ref(), + UpdateStatus::Processed(processed) => processed.from.from.content.as_ref(), + UpdateStatus::Aborted(aborted) => aborted.from.content.as_ref(), + UpdateStatus::Failed(failed) => failed.from.from.content.as_ref(), + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum UpdateResult { + DocumentsAddition(DocumentAdditionResult), + DocumentDeletion { deleted: u64 }, + Other, +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DocumentAdditionResult { + pub nb_documents: usize, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[non_exhaustive] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[non_exhaustive] +pub enum UpdateFormat { + /// The given update is a real **comma seperated** CSV with headers on the first line. + Csv, + /// The given update is a JSON array with documents inside. + Json, + /// The given update is a JSON stream with a document on each line. + JsonStream, +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(tag = "type")] +pub enum UpdateMeta { + DocumentsAddition { + method: IndexDocumentsMethod, + format: UpdateFormat, + primary_key: Option, + }, + ClearDocuments, + DeleteDocuments { + ids: Vec, + }, + Settings(Settings), +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Enqueued { + pub update_id: u64, + pub meta: UpdateMeta, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + pub content: Option, +} + +impl Enqueued { + pub fn meta(&self) -> &UpdateMeta { + &self.meta + } + + pub fn id(&self) -> u64 { + self.update_id + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processed { + pub success: UpdateResult, + #[serde(with = "time::serde::rfc3339")] + pub processed_at: OffsetDateTime, + #[serde(flatten)] + pub from: Processing, +} + +impl Processed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processing { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub started_processing_at: OffsetDateTime, +} + +impl Processing { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Aborted { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub aborted_at: OffsetDateTime, +} + +impl Aborted { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Failed { + #[serde(flatten)] + pub from: Processing, + pub error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + pub failed_at: OffsetDateTime, +} + +impl Failed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &UpdateMeta { + self.from.meta() + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(tag = "status", rename_all = "camelCase")] +pub enum UpdateStatus { + Processing(Processing), + Enqueued(Enqueued), + Processed(Processed), + Aborted(Aborted), + Failed(Failed), +} + +impl UpdateStatus { + pub fn id(&self) -> u64 { + match self { + UpdateStatus::Processing(u) => u.id(), + UpdateStatus::Enqueued(u) => u.id(), + UpdateStatus::Processed(u) => u.id(), + UpdateStatus::Aborted(u) => u.id(), + UpdateStatus::Failed(u) => u.id(), + } + } + + pub fn meta(&self) -> &UpdateMeta { + match self { + UpdateStatus::Processing(u) => u.meta(), + UpdateStatus::Enqueued(u) => u.meta(), + UpdateStatus::Processed(u) => u.meta(), + UpdateStatus::Aborted(u) => u.meta(), + UpdateStatus::Failed(u) => u.meta(), + } + } + + pub fn processed(&self) -> Option<&Processed> { + match self { + UpdateStatus::Processed(p) => Some(p), + _ => None, + } + } +} diff --git a/dump/src/reader/v3/errors.rs b/dump/src/reader/v3/errors.rs new file mode 100644 index 000000000..40c4d2c8d --- /dev/null +++ b/dump/src/reader/v3/errors.rs @@ -0,0 +1,51 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +pub enum Code { + // index related error + CreateIndex, + IndexAlreadyExists, + IndexNotFound, + InvalidIndexUid, + + // invalid state error + InvalidState, + MissingPrimaryKey, + PrimaryKeyAlreadyPresent, + + MaxFieldsLimitExceeded, + MissingDocumentId, + InvalidDocumentId, + + Filter, + Sort, + + BadParameter, + BadRequest, + DatabaseSizeLimitReached, + DocumentNotFound, + Internal, + InvalidGeoField, + InvalidRankingRule, + InvalidStore, + InvalidToken, + MissingAuthorizationHeader, + NoSpaceLeftOnDevice, + DumpNotFound, + TaskNotFound, + PayloadTooLarge, + RetrieveDocument, + SearchDocuments, + UnsupportedMediaType, + + DumpAlreadyInProgress, + DumpProcessFailed, + + InvalidContentType, + MissingContentType, + MalformedPayload, + MissingPayload, + + MalformedDump, + UnretrievableErrorCode, +} diff --git a/dump/src/reader/v3/meta.rs b/dump/src/reader/v3/meta.rs new file mode 100644 index 000000000..f83762914 --- /dev/null +++ b/dump/src/reader/v3/meta.rs @@ -0,0 +1,18 @@ +use serde::Deserialize; +use uuid::Uuid; + +use super::Settings; + +#[derive(Deserialize, Debug, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub uuid: Uuid, +} + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} diff --git a/dump/src/reader/v3/mod.rs b/dump/src/reader/v3/mod.rs new file mode 100644 index 000000000..d363a76f1 --- /dev/null +++ b/dump/src/reader/v3/mod.rs @@ -0,0 +1,326 @@ +//! ```text +//! . +//! ├── indexes +//! │   ├── 01d7dd17-8241-4f1f-a7d1-2d1cb255f5b0 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── 78be64a3-cae1-449e-b7ed-13e77c9a8a0c +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── ba553439-18fe-4733-ba53-44eed898280c +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── c408bc22-5859-49d1-8e9f-c88e2fa95cb0 +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── updates_files +//! └── 66d3f12d-fcf3-4b53-88cb-407017373de7 +//! ``` + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; + +pub mod errors; +pub mod meta; +pub mod settings; +pub mod updates; + +use self::meta::{DumpMeta, IndexUuid}; +use super::compat::v3_to_v4::CompatV3ToV4; +use super::Document; +use crate::{Error, IndexMetadata, Result, Version}; + +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = updates::UpdateEntry; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = updates::UpdateStatus; +pub type Kind = updates::Update; + +// everything related to the settings +pub type Setting = settings::Setting; + +// everything related to the errors +pub type Code = errors::Code; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V3Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + index_uuid: Vec, +} + +impl V3Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V3Reader { + metadata, + tasks: BufReader::new(File::open(dump.path().join("updates").join("data.jsonl"))?), + index_uuid, + dump, + }) + } + + pub fn index_uuid(&self) -> Vec { + self.index_uuid.clone() + } + + pub fn to_v4(self) -> CompatV3ToV4 { + CompatV3ToV4::new(self) + } + + pub fn version(&self) -> Version { + Version::V3 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V3IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(index.uuid.to_string()), + ) + })) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("updates_files") + .join(uuid.to_string()); + Ok(( + task, + Some( + Box::new(UpdateFile::new(&update_file_path)?) as Box + ), + )) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } +} + +pub struct V3IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V3IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V3IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v3() { + let dump = File::open("tests/assets/v3.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V3Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-07 11:39:03.709153554 +00:00:00"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"63086d59c3f2074e4ab3fff7e8cc36c1"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies2 = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"f309b009608cc0b770b2f74516f92647"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"95dff22ba3a7019616c12df9daa35e1e"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d153b5a81d8b3cdcbe1dec270b574022"); + + // movies2 + insta::assert_json_snapshot!(movies2.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies_2", + "primaryKey": null, + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies2.settings()), @"1dafc4b123e3a8e14a889719cc01f6e5"); + let documents = movies2.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 0); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"1dafc4b123e3a8e14a889719cc01f6e5"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v3/settings.rs b/dump/src/reader/v3/settings.rs new file mode 100644 index 000000000..0027bf4ff --- /dev/null +++ b/dump/src/reader/v3/settings.rs @@ -0,0 +1,233 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; +use std::num::NonZeroUsize; + +use serde::{Deserialize, Deserializer}; + +#[cfg(test)] +fn serialize_with_wildcard( + field: &Setting>, + s: S, +) -> std::result::Result +where + S: serde::Serializer, +{ + use serde::Serialize; + + let wildcard = vec!["*".to_string()]; + match field { + Setting::Set(value) => Some(value), + Setting::Reset => Some(&wildcard), + Setting::NotSet => None, + } + .serialize(s) +} + +#[derive(Clone, Default, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Checked; + +#[derive(Clone, Default, Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Unchecked; + +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub displayed_attributes: Setting>, + + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub searchable_attributes: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub filterable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub sortable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub ranking_rules: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub stop_words: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub synonyms: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub distinct_attribute: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + _kind: PhantomData, + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct Facets { + pub level_group_size: Option, + pub min_level_size: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn map(self, f: F) -> Setting + where + F: FnOnce(T) -> U, + { + match self { + Setting::Set(t) => Setting::Set(f(t)), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + } + } + + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } +} + +#[cfg(test)] +impl serde::Serialize for Setting { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + match self { + Self::Set(value) => Some(value), + // Usually not_set isn't serialized by setting skip_serializing_if field attribute + Self::NotSet | Self::Reset => None, + } + .serialize(serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} diff --git a/dump/src/reader/v3/updates.rs b/dump/src/reader/v3/updates.rs new file mode 100644 index 000000000..2f9e49c1a --- /dev/null +++ b/dump/src/reader/v3/updates.rs @@ -0,0 +1,227 @@ +use std::fmt::Display; + +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::{Code, Settings, Unchecked}; + +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct UpdateEntry { + pub uuid: Uuid, + pub update: UpdateStatus, +} + +impl UpdateEntry { + pub fn is_finished(&self) -> bool { + match self.update { + UpdateStatus::Processed(_) | UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) => true, + UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false, + } + } + + pub fn get_content_uuid(&self) -> Option<&Uuid> { + match self.update.meta() { + Update::DocumentAddition { content_uuid, .. } => Some(content_uuid), + Update::DeleteDocuments(_) | Update::Settings(_) | Update::ClearDocuments => None, + } + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(tag = "status", rename_all = "camelCase")] +pub enum UpdateStatus { + Processing(Processing), + Enqueued(Enqueued), + Processed(Processed), + Aborted(Aborted), + Failed(Failed), +} + +impl UpdateStatus { + pub fn id(&self) -> u64 { + match self { + UpdateStatus::Processing(u) => u.id(), + UpdateStatus::Enqueued(u) => u.id(), + UpdateStatus::Processed(u) => u.id(), + UpdateStatus::Aborted(u) => u.id(), + UpdateStatus::Failed(u) => u.id(), + } + } + + pub fn meta(&self) -> &Update { + match self { + UpdateStatus::Processing(u) => u.meta(), + UpdateStatus::Enqueued(u) => u.meta(), + UpdateStatus::Processed(u) => u.meta(), + UpdateStatus::Aborted(u) => u.meta(), + UpdateStatus::Failed(u) => u.meta(), + } + } + + pub fn is_finished(&self) -> bool { + match self { + UpdateStatus::Processing(_) | UpdateStatus::Enqueued(_) => false, + UpdateStatus::Aborted(_) | UpdateStatus::Failed(_) | UpdateStatus::Processed(_) => true, + } + } + + pub fn processed(&self) -> Option<&Processed> { + match self { + UpdateStatus::Processed(p) => Some(p), + _ => None, + } + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Enqueued { + pub update_id: u64, + pub meta: Update, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, +} + +impl Enqueued { + pub fn meta(&self) -> &Update { + &self.meta + } + + pub fn id(&self) -> u64 { + self.update_id + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processed { + pub success: UpdateResult, + #[serde(with = "time::serde::rfc3339")] + pub processed_at: OffsetDateTime, + #[serde(flatten)] + pub from: Processing, +} + +impl Processed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Processing { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub started_processing_at: OffsetDateTime, +} + +impl Processing { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Aborted { + #[serde(flatten)] + pub from: Enqueued, + #[serde(with = "time::serde::rfc3339")] + pub aborted_at: OffsetDateTime, +} + +impl Aborted { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub struct Failed { + #[serde(flatten)] + pub from: Processing, + pub msg: String, + pub code: Code, + #[serde(with = "time::serde::rfc3339")] + pub failed_at: OffsetDateTime, +} + +impl Display for Failed { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.msg.fmt(f) + } +} + +impl Failed { + pub fn id(&self) -> u64 { + self.from.id() + } + + pub fn meta(&self) -> &Update { + self.from.meta() + } +} + +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum Update { + DeleteDocuments(Vec), + DocumentAddition { + primary_key: Option, + method: IndexDocumentsMethod, + content_uuid: Uuid, + }, + Settings(Settings), + ClearDocuments, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[non_exhaustive] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum UpdateResult { + DocumentsAddition(DocumentAdditionResult), + DocumentDeletion { deleted: u64 }, + Other, +} + +#[derive(Debug, Deserialize, Clone)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DocumentAdditionResult { + pub nb_documents: usize, +} diff --git a/dump/src/reader/v4/errors.rs b/dump/src/reader/v4/errors.rs new file mode 100644 index 000000000..5a9a8d5df --- /dev/null +++ b/dump/src/reader/v4/errors.rs @@ -0,0 +1,311 @@ +use std::fmt; + +use http::StatusCode; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))] +pub struct ResponseError { + #[serde(skip)] + #[cfg_attr(feature = "test-traits", proptest(strategy = "strategy::status_code_strategy()"))] + pub code: StatusCode, + pub message: String, + #[serde(rename = "code")] + pub error_code: String, + #[serde(rename = "type")] + pub error_type: String, + #[serde(rename = "link")] + pub error_link: String, +} + +impl ResponseError { + pub fn from_msg(message: String, code: Code) -> Self { + Self { + code: code.http(), + message, + error_code: code.err_code().error_name.to_string(), + error_type: code.type_(), + error_link: code.url(), + } + } +} + +impl fmt::Display for ResponseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.message.fmt(f) + } +} + +impl std::error::Error for ResponseError {} + +impl From for ResponseError +where + T: ErrorCode, +{ + fn from(other: T) -> Self { + Self { + code: other.http_status(), + message: other.to_string(), + error_code: other.error_name(), + error_type: other.error_type(), + error_link: other.error_url(), + } + } +} + +pub trait ErrorCode: std::error::Error { + fn error_code(&self) -> Code; + + /// returns the HTTP status code ascociated with the error + fn http_status(&self) -> StatusCode { + self.error_code().http() + } + + /// returns the doc url ascociated with the error + fn error_url(&self) -> String { + self.error_code().url() + } + + /// returns error name, used as error code + fn error_name(&self) -> String { + self.error_code().name() + } + + /// return the error type + fn error_type(&self) -> String { + self.error_code().type_() + } +} + +#[allow(clippy::enum_variant_names)] +enum ErrorType { + InternalError, + InvalidRequestError, + AuthenticationError, +} + +impl fmt::Display for ErrorType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ErrorType::*; + + match self { + InternalError => write!(f, "internal"), + InvalidRequestError => write!(f, "invalid_request"), + AuthenticationError => write!(f, "auth"), + } + } +} + +#[derive(Serialize, Deserialize, Debug, Clone, Copy)] +pub enum Code { + // index related error + CreateIndex, + IndexAlreadyExists, + IndexNotFound, + InvalidIndexUid, + InvalidMinWordLengthForTypo, + + // invalid state error + InvalidState, + MissingPrimaryKey, + PrimaryKeyAlreadyPresent, + + MaxFieldsLimitExceeded, + MissingDocumentId, + InvalidDocumentId, + + Filter, + Sort, + + BadParameter, + BadRequest, + DatabaseSizeLimitReached, + DocumentNotFound, + Internal, + InvalidGeoField, + InvalidRankingRule, + InvalidStore, + InvalidToken, + MissingAuthorizationHeader, + NoSpaceLeftOnDevice, + DumpNotFound, + TaskNotFound, + PayloadTooLarge, + RetrieveDocument, + SearchDocuments, + UnsupportedMediaType, + + DumpAlreadyInProgress, + DumpProcessFailed, + + InvalidContentType, + MissingContentType, + MalformedPayload, + MissingPayload, + + ApiKeyNotFound, + MissingParameter, + InvalidApiKeyActions, + InvalidApiKeyIndexes, + InvalidApiKeyExpiresAt, + InvalidApiKeyDescription, + + UnretrievableErrorCode, + MalformedDump, +} + +impl Code { + /// ascociate a `Code` variant to the actual ErrCode + fn err_code(&self) -> ErrCode { + use Code::*; + + match self { + // index related errors + // create index is thrown on internal error while creating an index. + CreateIndex => { + ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT), + // thrown when requesting an unexisting index + IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND), + InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST), + + // invalid state error + InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR), + // thrown when no primary key has been set + MissingPrimaryKey => { + ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST) + } + // error thrown when trying to set an already existing primary key + PrimaryKeyAlreadyPresent => { + ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST) + } + // invalid ranking rule + InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST), + + // invalid database + InvalidStore => { + ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR) + } + + // invalid document + MaxFieldsLimitExceeded => { + ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST) + } + MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST), + InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST), + + // error related to filters + Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST), + // error related to sorts + Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST), + + BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), + BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), + DatabaseSizeLimitReached => { + ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR) + } + DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), + Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), + InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST), + InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN), + MissingAuthorizationHeader => { + ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED) + } + TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND), + DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), + NoSpaceLeftOnDevice => { + ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) + } + PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE), + RetrieveDocument => { + ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST) + } + SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST), + UnsupportedMediaType => { + ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + + // error related to dump + DumpAlreadyInProgress => { + ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT) + } + DumpProcessFailed => { + ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + MissingContentType => { + ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST), + InvalidContentType => { + ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST), + + // error related to keys + ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND), + MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST), + InvalidApiKeyActions => { + ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST) + } + InvalidApiKeyIndexes => { + ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST) + } + InvalidApiKeyExpiresAt => { + ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST) + } + InvalidApiKeyDescription => { + ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) + } + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } + UnretrievableErrorCode => { + ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST) + } + MalformedDump => ErrCode::invalid("malformed_dump", StatusCode::BAD_REQUEST), + } + } + + /// return the HTTP status code ascociated with the `Code` + fn http(&self) -> StatusCode { + self.err_code().status_code + } + + /// return error name, used as error code + fn name(&self) -> String { + self.err_code().error_name.to_string() + } + + /// return the error type + fn type_(&self) -> String { + self.err_code().error_type.to_string() + } + + /// return the doc url ascociated with the error + fn url(&self) -> String { + format!("https://docs.meilisearch.com/errors#{}", self.name()) + } +} + +/// Internal structure providing a convenient way to create error codes +struct ErrCode { + status_code: StatusCode, + error_type: ErrorType, + error_name: &'static str, +} + +impl ErrCode { + fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError } + } + + fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InternalError } + } + + fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError } + } +} diff --git a/dump/src/reader/v4/keys.rs b/dump/src/reader/v4/keys.rs new file mode 100644 index 000000000..26e5cad7d --- /dev/null +++ b/dump/src/reader/v4/keys.rs @@ -0,0 +1,77 @@ +use serde::Deserialize; +use time::OffsetDateTime; + +pub const KEY_ID_LENGTH: usize = 8; +pub type KeyId = [u8; KEY_ID_LENGTH]; + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Key { + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + pub id: KeyId, + pub actions: Vec, + pub indexes: Vec, + #[serde(with = "time::serde::rfc3339::option")] + pub expires_at: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[repr(u8)] +pub enum Action { + #[serde(rename = "*")] + All = 0, + #[serde(rename = "search")] + Search = actions::SEARCH, + #[serde(rename = "documents.add")] + DocumentsAdd = actions::DOCUMENTS_ADD, + #[serde(rename = "documents.get")] + DocumentsGet = actions::DOCUMENTS_GET, + #[serde(rename = "documents.delete")] + DocumentsDelete = actions::DOCUMENTS_DELETE, + #[serde(rename = "indexes.create")] + IndexesAdd = actions::INDEXES_CREATE, + #[serde(rename = "indexes.get")] + IndexesGet = actions::INDEXES_GET, + #[serde(rename = "indexes.update")] + IndexesUpdate = actions::INDEXES_UPDATE, + #[serde(rename = "indexes.delete")] + IndexesDelete = actions::INDEXES_DELETE, + #[serde(rename = "tasks.get")] + TasksGet = actions::TASKS_GET, + #[serde(rename = "settings.get")] + SettingsGet = actions::SETTINGS_GET, + #[serde(rename = "settings.update")] + SettingsUpdate = actions::SETTINGS_UPDATE, + #[serde(rename = "stats.get")] + StatsGet = actions::STATS_GET, + #[serde(rename = "dumps.create")] + DumpsCreate = actions::DUMPS_CREATE, + #[serde(rename = "dumps.get")] + DumpsGet = actions::DUMPS_GET, + #[serde(rename = "version")] + Version = actions::VERSION, +} + +pub mod actions { + pub const SEARCH: u8 = 1; + pub const DOCUMENTS_ADD: u8 = 2; + pub const DOCUMENTS_GET: u8 = 3; + pub const DOCUMENTS_DELETE: u8 = 4; + pub const INDEXES_CREATE: u8 = 5; + pub const INDEXES_GET: u8 = 6; + pub const INDEXES_UPDATE: u8 = 7; + pub const INDEXES_DELETE: u8 = 8; + pub const TASKS_GET: u8 = 9; + pub const SETTINGS_GET: u8 = 10; + pub const SETTINGS_UPDATE: u8 = 11; + pub const STATS_GET: u8 = 12; + pub const DUMPS_CREATE: u8 = 13; + pub const DUMPS_GET: u8 = 14; + pub const VERSION: u8 = 15; +} diff --git a/dump/src/reader/v4/meta.rs b/dump/src/reader/v4/meta.rs new file mode 100644 index 000000000..cec05f57c --- /dev/null +++ b/dump/src/reader/v4/meta.rs @@ -0,0 +1,139 @@ +use std::fmt::{self, Display, Formatter}; +use std::marker::PhantomData; +use std::str::FromStr; + +use serde::de::Visitor; +use serde::{Deserialize, Deserializer}; +use uuid::Uuid; + +use super::settings::{Settings, Unchecked}; + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub index_meta: IndexMeta, +} + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexMeta { + pub uuid: Uuid, + pub creation_task_id: usize, +} + +// There is one in each indexes under `meta.json`. +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUid(pub String); + +impl TryFrom for IndexUid { + type Error = IndexUidFormatError; + + fn try_from(uid: String) -> Result { + if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + || uid.is_empty() + || uid.len() > 400 + { + Err(IndexUidFormatError { invalid_uid: uid }) + } else { + Ok(IndexUid(uid)) + } + } +} + +impl FromStr for IndexUid { + type Err = IndexUidFormatError; + + fn from_str(uid: &str) -> Result { + uid.to_string().try_into() + } +} + +impl From for String { + fn from(uid: IndexUid) -> Self { + uid.into_inner() + } +} + +#[derive(Debug)] +pub struct IndexUidFormatError { + pub invalid_uid: String, +} + +impl Display for IndexUidFormatError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "invalid index uid `{}`, the uid must be an integer \ + or a string containing only alphanumeric characters \ + a-z A-Z 0-9, hyphens - and underscores _.", + self.invalid_uid, + ) + } +} + +impl std::error::Error for IndexUidFormatError {} + +/// A type that tries to match either a star (*) or +/// any other thing that implements `FromStr`. +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum StarOr { + Star, + Other(T), +} + +impl<'de, T, E> Deserialize<'de> for StarOr +where + T: FromStr, + E: Display, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + /// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag. + /// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to + /// deserialize everything as a `StarOr::Other`, including "*". + /// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is + /// not supported on untagged enums. + struct StarOrVisitor(PhantomData); + + impl<'de, T, FE> Visitor<'de> for StarOrVisitor + where + T: FromStr, + FE: Display, + { + type Value = StarOr; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + SE: serde::de::Error, + { + match v { + "*" => Ok(StarOr::Star), + v => { + let other = FromStr::from_str(v).map_err(|e: T::Err| { + SE::custom(format!("Invalid `other` value: {}", e)) + })?; + Ok(StarOr::Other(other)) + } + } + } + } + + deserializer.deserialize_str(StarOrVisitor(PhantomData)) + } +} diff --git a/dump/src/reader/v4/mod.rs b/dump/src/reader/v4/mod.rs new file mode 100644 index 000000000..3aad71ddb --- /dev/null +++ b/dump/src/reader/v4/mod.rs @@ -0,0 +1,307 @@ +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, ErrorKind}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +pub mod errors; +pub mod keys; +pub mod meta; +pub mod settings; +pub mod tasks; + +use self::meta::{DumpMeta, IndexUuid}; +use super::compat::v4_to_v5::CompatV4ToV5; +use crate::{Error, IndexMetadata, Result, Version}; + +pub type Document = serde_json::Map; +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = tasks::Task; +pub type Key = keys::Key; + +// everything related to the settings +pub type Setting = settings::Setting; + +// everything related to the api keys +pub type Action = keys::Action; + +// everything related to the errors +pub type ResponseError = errors::ResponseError; +pub type Code = errors::Code; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V4Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, + index_uuid: Vec, +} + +impl V4Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V4Reader { + metadata, + tasks: BufReader::new( + File::open(dump.path().join("updates").join("data.jsonl")).unwrap(), + ), + keys: BufReader::new(File::open(dump.path().join("keys"))?), + index_uuid, + dump, + }) + } + + pub fn to_v5(self) -> CompatV4ToV5 { + CompatV4ToV5::new(self) + } + + pub fn version(&self) -> Version { + Version::V4 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + match fs::read_to_string(self.dump.path().join("instance-uid")) { + Ok(uuid) => Ok(Some(Uuid::parse_str(&uuid)?)), + Err(e) if e.kind() == ErrorKind::NotFound => Ok(None), + Err(e) => Err(e.into()), + } + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V4IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + ) + })) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("updates_files") + .join(uuid.to_string()); + Ok(( + task, + Some( + Box::new(UpdateFile::new(&update_file_path)?) as Box + ), + )) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new( + (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } +} + +pub struct V4IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V4IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V4IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v4() { + let dump = File::open("tests/assets/v4.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V4Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-06 12:53:49.131989609 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"f4efacbea0c1a4400873f4b2ee33f975"); + assert_eq!(update_files.len(), 10); + assert!(update_files[0].is_some()); // the enqueued document addition + assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(0).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // keys + let keys = dump.keys().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys, { "[].uid" => "[uuid]" }), @"9240300dca8f962cdf58359ef4c76f09"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"65b139c6b9fc251e187073c8557803e2"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"06aa1988493485d9b2cda7c751e6bb15"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 110); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"786022a66ecb992c8a2a60fee070a5ab"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"7d722fc2629eaa45032ed3deb0c9b4ce"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v4/settings.rs b/dump/src/reader/v4/settings.rs new file mode 100644 index 000000000..964cd1152 --- /dev/null +++ b/dump/src/reader/v4/settings.rs @@ -0,0 +1,261 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; +use std::num::NonZeroUsize; + +use serde::{Deserialize, Deserializer}; + +#[cfg(test)] +fn serialize_with_wildcard( + field: &Setting>, + s: S, +) -> std::result::Result +where + S: serde::Serializer, +{ + use serde::Serialize; + + let wildcard = vec!["*".to_string()]; + match field { + Setting::Set(value) => Some(value), + Setting::Reset => Some(&wildcard), + Setting::NotSet => None, + } + .serialize(s) +} + +#[derive(Clone, Default, Debug, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Checked; + +#[derive(Clone, Default, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Unchecked; + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct MinWordSizeTyposSetting { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub one_typo: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub two_typos: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct TypoSettings { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub enabled: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub min_word_size_for_typos: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_words: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_attributes: Setting>, +} +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: serde::Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub displayed_attributes: Setting>, + + #[serde( + default, + serialize_with = "serialize_with_wildcard", + skip_serializing_if = "Setting::is_not_set" + )] + pub searchable_attributes: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub filterable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub sortable_attributes: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub ranking_rules: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub stop_words: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub synonyms: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub distinct_attribute: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub typo_tolerance: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + typo_tolerance: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + typo_tolerance: self.typo_tolerance, + _kind: PhantomData, + } + } +} + +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct Facets { + pub level_group_size: Option, + pub min_level_size: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } + + /// If `Self` is `Reset`, then map self to `Set` with the provided `val`. + pub fn or_reset(self, val: T) -> Self { + match self { + Self::Reset => Self::Set(val), + otherwise => otherwise, + } + } +} + +#[cfg(test)] +impl serde::Serialize for Setting { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + match self { + Self::Set(value) => Some(value), + // Usually not_set isn't serialized by setting skip_serializing_if field attribute + Self::NotSet | Self::Reset => None, + } + .serialize(serializer) + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} diff --git a/dump/src/reader/v4/tasks.rs b/dump/src/reader/v4/tasks.rs new file mode 100644 index 000000000..e1bdde0c7 --- /dev/null +++ b/dump/src/reader/v4/tasks.rs @@ -0,0 +1,135 @@ +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::errors::ResponseError; +use super::meta::IndexUid; +use super::settings::{Settings, Unchecked}; + +pub type TaskId = u32; +pub type BatchId = u32; + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Task { + pub id: TaskId, + pub index_uid: IndexUid, + pub content: TaskContent, + pub events: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[allow(clippy::large_enum_variant)] +pub enum TaskContent { + DocumentAddition { + content_uuid: Uuid, + merge_strategy: IndexDocumentsMethod, + primary_key: Option, + documents_count: usize, + allow_index_creation: bool, + }, + DocumentDeletion(DocumentDeletion), + SettingsUpdate { + settings: Settings, + /// Indicates whether the task was a deletion + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion, + IndexCreation { + primary_key: Option, + }, + IndexUpdate { + primary_key: Option, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum DocumentDeletion { + Clear, + Ids(Vec), +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskEvent { + Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Batched { + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + batch_id: BatchId, + }, + Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Succeded { + result: TaskResult, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, + Failed { + error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskResult { + DocumentAddition { indexed_documents: u64 }, + DocumentDeletion { deleted_documents: u64 }, + ClearAll { deleted_documents: u64 }, + Other, +} + +impl Task { + /// Return true when a task is finished. + /// A task is finished when its last state is either `Succeeded` or `Failed`. + pub fn is_finished(&self) -> bool { + self.events.last().map_or(false, |event| { + matches!(event, TaskEvent::Succeded { .. } | TaskEvent::Failed { .. }) + }) + } + + /// Return the content_uuid of the `Task` if there is one. + pub fn get_content_uuid(&self) -> Option { + match self { + Task { content: TaskContent::DocumentAddition { content_uuid, .. }, .. } => { + Some(*content_uuid) + } + _ => None, + } + } +} + +impl IndexUid { + pub fn into_inner(self) -> String { + self.0 + } + + /// Return a reference over the inner str. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::ops::Deref for IndexUid { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/dump/src/reader/v5/errors.rs b/dump/src/reader/v5/errors.rs new file mode 100644 index 000000000..c918c301c --- /dev/null +++ b/dump/src/reader/v5/errors.rs @@ -0,0 +1,272 @@ +use std::fmt; + +use http::StatusCode; +use serde::Deserialize; + +#[derive(Debug, Deserialize, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase")] +#[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct ResponseError { + #[serde(skip)] + code: StatusCode, + + pub message: String, + #[serde(rename = "code")] + pub error_code: String, + #[serde(rename = "type")] + pub error_type: String, + #[serde(rename = "link")] + pub error_link: String, +} + +impl ResponseError { + pub fn from_msg(message: String, code: Code) -> Self { + Self { + code: code.http(), + message, + error_code: code.err_code().error_name.to_string(), + error_type: code.type_(), + error_link: code.url(), + } + } +} + +#[derive(Deserialize, Debug, Clone, Copy)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum Code { + // index related error + CreateIndex, + IndexAlreadyExists, + IndexNotFound, + InvalidIndexUid, + InvalidMinWordLengthForTypo, + + // invalid state error + InvalidState, + MissingPrimaryKey, + PrimaryKeyAlreadyPresent, + + MaxFieldsLimitExceeded, + MissingDocumentId, + InvalidDocumentId, + + Filter, + Sort, + + BadParameter, + BadRequest, + DatabaseSizeLimitReached, + DocumentNotFound, + Internal, + InvalidGeoField, + InvalidRankingRule, + InvalidStore, + InvalidToken, + MissingAuthorizationHeader, + NoSpaceLeftOnDevice, + DumpNotFound, + TaskNotFound, + PayloadTooLarge, + RetrieveDocument, + SearchDocuments, + UnsupportedMediaType, + + DumpAlreadyInProgress, + DumpProcessFailed, + + InvalidContentType, + MissingContentType, + MalformedPayload, + MissingPayload, + + ApiKeyNotFound, + MissingParameter, + InvalidApiKeyActions, + InvalidApiKeyIndexes, + InvalidApiKeyExpiresAt, + InvalidApiKeyDescription, + InvalidApiKeyName, + InvalidApiKeyUid, + ImmutableField, + ApiKeyAlreadyExists, + + UnretrievableErrorCode, +} + +impl Code { + /// associate a `Code` variant to the actual ErrCode + fn err_code(&self) -> ErrCode { + use Code::*; + + match self { + // index related errors + // create index is thrown on internal error while creating an index. + CreateIndex => { + ErrCode::internal("index_creation_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + IndexAlreadyExists => ErrCode::invalid("index_already_exists", StatusCode::CONFLICT), + // thrown when requesting an unexisting index + IndexNotFound => ErrCode::invalid("index_not_found", StatusCode::NOT_FOUND), + InvalidIndexUid => ErrCode::invalid("invalid_index_uid", StatusCode::BAD_REQUEST), + + // invalid state error + InvalidState => ErrCode::internal("invalid_state", StatusCode::INTERNAL_SERVER_ERROR), + // thrown when no primary key has been set + MissingPrimaryKey => { + ErrCode::invalid("primary_key_inference_failed", StatusCode::BAD_REQUEST) + } + // error thrown when trying to set an already existing primary key + PrimaryKeyAlreadyPresent => { + ErrCode::invalid("index_primary_key_already_exists", StatusCode::BAD_REQUEST) + } + // invalid ranking rule + InvalidRankingRule => ErrCode::invalid("invalid_ranking_rule", StatusCode::BAD_REQUEST), + + // invalid database + InvalidStore => { + ErrCode::internal("invalid_store_file", StatusCode::INTERNAL_SERVER_ERROR) + } + + // invalid document + MaxFieldsLimitExceeded => { + ErrCode::invalid("max_fields_limit_exceeded", StatusCode::BAD_REQUEST) + } + MissingDocumentId => ErrCode::invalid("missing_document_id", StatusCode::BAD_REQUEST), + InvalidDocumentId => ErrCode::invalid("invalid_document_id", StatusCode::BAD_REQUEST), + + // error related to filters + Filter => ErrCode::invalid("invalid_filter", StatusCode::BAD_REQUEST), + // error related to sorts + Sort => ErrCode::invalid("invalid_sort", StatusCode::BAD_REQUEST), + + BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), + BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), + DatabaseSizeLimitReached => { + ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR) + } + DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), + Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), + InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST), + InvalidToken => ErrCode::authentication("invalid_api_key", StatusCode::FORBIDDEN), + MissingAuthorizationHeader => { + ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED) + } + TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND), + DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), + NoSpaceLeftOnDevice => { + ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) + } + PayloadTooLarge => ErrCode::invalid("payload_too_large", StatusCode::PAYLOAD_TOO_LARGE), + RetrieveDocument => { + ErrCode::internal("unretrievable_document", StatusCode::BAD_REQUEST) + } + SearchDocuments => ErrCode::internal("search_error", StatusCode::BAD_REQUEST), + UnsupportedMediaType => { + ErrCode::invalid("unsupported_media_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + + // error related to dump + DumpAlreadyInProgress => { + ErrCode::invalid("dump_already_processing", StatusCode::CONFLICT) + } + DumpProcessFailed => { + ErrCode::internal("dump_process_failed", StatusCode::INTERNAL_SERVER_ERROR) + } + MissingContentType => { + ErrCode::invalid("missing_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MalformedPayload => ErrCode::invalid("malformed_payload", StatusCode::BAD_REQUEST), + InvalidContentType => { + ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) + } + MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST), + + // error related to keys + ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND), + MissingParameter => ErrCode::invalid("missing_parameter", StatusCode::BAD_REQUEST), + InvalidApiKeyActions => { + ErrCode::invalid("invalid_api_key_actions", StatusCode::BAD_REQUEST) + } + InvalidApiKeyIndexes => { + ErrCode::invalid("invalid_api_key_indexes", StatusCode::BAD_REQUEST) + } + InvalidApiKeyExpiresAt => { + ErrCode::invalid("invalid_api_key_expires_at", StatusCode::BAD_REQUEST) + } + InvalidApiKeyDescription => { + ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) + } + InvalidApiKeyName => ErrCode::invalid("invalid_api_key_name", StatusCode::BAD_REQUEST), + InvalidApiKeyUid => ErrCode::invalid("invalid_api_key_uid", StatusCode::BAD_REQUEST), + ApiKeyAlreadyExists => ErrCode::invalid("api_key_already_exists", StatusCode::CONFLICT), + ImmutableField => ErrCode::invalid("immutable_field", StatusCode::BAD_REQUEST), + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } + UnretrievableErrorCode => { + ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST) + } + } + } + + /// return the HTTP status code associated with the `Code` + fn http(&self) -> StatusCode { + self.err_code().status_code + } + + /// return error name, used as error code + fn name(&self) -> String { + self.err_code().error_name.to_string() + } + + /// return the error type + fn type_(&self) -> String { + self.err_code().error_type.to_string() + } + + /// return the doc url associated with the error + fn url(&self) -> String { + format!("https://docs.meilisearch.com/errors#{}", self.name()) + } +} + +/// Internal structure providing a convenient way to create error codes +struct ErrCode { + status_code: StatusCode, + error_type: ErrorType, + error_name: &'static str, +} + +impl ErrCode { + fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError } + } + + fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InternalError } + } + + fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode { + ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError } + } +} + +#[allow(clippy::enum_variant_names)] +enum ErrorType { + InternalError, + InvalidRequestError, + AuthenticationError, +} + +impl fmt::Display for ErrorType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use ErrorType::*; + + match self { + InternalError => write!(f, "internal"), + InvalidRequestError => write!(f, "invalid_request"), + AuthenticationError => write!(f, "auth"), + } + } +} diff --git a/dump/src/reader/v5/keys.rs b/dump/src/reader/v5/keys.rs new file mode 100644 index 000000000..12e44d85a --- /dev/null +++ b/dump/src/reader/v5/keys.rs @@ -0,0 +1,83 @@ +use serde::Deserialize; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::meta::{IndexUid, StarOr}; + +pub type KeyId = Uuid; + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Key { + pub description: Option, + pub name: Option, + pub uid: KeyId, + pub actions: Vec, + pub indexes: Vec>, + #[serde(with = "time::serde::rfc3339::option")] + pub expires_at: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +#[derive(Copy, Clone, Deserialize, Debug, Eq, PartialEq, Hash)] +#[cfg_attr(test, derive(serde::Serialize))] +#[repr(u8)] +pub enum Action { + #[serde(rename = "*")] + All = 0, + #[serde(rename = "search")] + Search, + #[serde(rename = "documents.*")] + DocumentsAll, + #[serde(rename = "documents.add")] + DocumentsAdd, + #[serde(rename = "documents.get")] + DocumentsGet, + #[serde(rename = "documents.delete")] + DocumentsDelete, + #[serde(rename = "indexes.*")] + IndexesAll, + #[serde(rename = "indexes.create")] + IndexesAdd, + #[serde(rename = "indexes.get")] + IndexesGet, + #[serde(rename = "indexes.update")] + IndexesUpdate, + #[serde(rename = "indexes.delete")] + IndexesDelete, + #[serde(rename = "tasks.*")] + TasksAll, + #[serde(rename = "tasks.get")] + TasksGet, + #[serde(rename = "settings.*")] + SettingsAll, + #[serde(rename = "settings.get")] + SettingsGet, + #[serde(rename = "settings.update")] + SettingsUpdate, + #[serde(rename = "stats.*")] + StatsAll, + #[serde(rename = "stats.get")] + StatsGet, + #[serde(rename = "metrics.*")] + MetricsAll, + #[serde(rename = "metrics.get")] + MetricsGet, + #[serde(rename = "dumps.*")] + DumpsAll, + #[serde(rename = "dumps.create")] + DumpsCreate, + #[serde(rename = "version")] + Version, + #[serde(rename = "keys.create")] + KeysAdd, + #[serde(rename = "keys.get")] + KeysGet, + #[serde(rename = "keys.update")] + KeysUpdate, + #[serde(rename = "keys.delete")] + KeysDelete, +} diff --git a/dump/src/reader/v5/meta.rs b/dump/src/reader/v5/meta.rs new file mode 100644 index 000000000..cec05f57c --- /dev/null +++ b/dump/src/reader/v5/meta.rs @@ -0,0 +1,139 @@ +use std::fmt::{self, Display, Formatter}; +use std::marker::PhantomData; +use std::str::FromStr; + +use serde::de::Visitor; +use serde::{Deserialize, Deserializer}; +use uuid::Uuid; + +use super::settings::{Settings, Unchecked}; + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUuid { + pub uid: String, + pub index_meta: IndexMeta, +} + +#[derive(Deserialize, Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexMeta { + pub uuid: Uuid, + pub creation_task_id: usize, +} + +// There is one in each indexes under `meta.json`. +#[derive(Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct DumpMeta { + pub settings: Settings, + pub primary_key: Option, +} + +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct IndexUid(pub String); + +impl TryFrom for IndexUid { + type Error = IndexUidFormatError; + + fn try_from(uid: String) -> Result { + if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + || uid.is_empty() + || uid.len() > 400 + { + Err(IndexUidFormatError { invalid_uid: uid }) + } else { + Ok(IndexUid(uid)) + } + } +} + +impl FromStr for IndexUid { + type Err = IndexUidFormatError; + + fn from_str(uid: &str) -> Result { + uid.to_string().try_into() + } +} + +impl From for String { + fn from(uid: IndexUid) -> Self { + uid.into_inner() + } +} + +#[derive(Debug)] +pub struct IndexUidFormatError { + pub invalid_uid: String, +} + +impl Display for IndexUidFormatError { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!( + f, + "invalid index uid `{}`, the uid must be an integer \ + or a string containing only alphanumeric characters \ + a-z A-Z 0-9, hyphens - and underscores _.", + self.invalid_uid, + ) + } +} + +impl std::error::Error for IndexUidFormatError {} + +/// A type that tries to match either a star (*) or +/// any other thing that implements `FromStr`. +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum StarOr { + Star, + Other(T), +} + +impl<'de, T, E> Deserialize<'de> for StarOr +where + T: FromStr, + E: Display, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + /// Serde can't differentiate between `StarOr::Star` and `StarOr::Other` without a tag. + /// Simply using `#[serde(untagged)]` + `#[serde(rename="*")]` will lead to attempting to + /// deserialize everything as a `StarOr::Other`, including "*". + /// [`#[serde(other)]`](https://serde.rs/variant-attrs.html#other) might have helped but is + /// not supported on untagged enums. + struct StarOrVisitor(PhantomData); + + impl<'de, T, FE> Visitor<'de> for StarOrVisitor + where + T: FromStr, + FE: Display, + { + type Value = StarOr; + + fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result { + formatter.write_str("a string") + } + + fn visit_str(self, v: &str) -> Result + where + SE: serde::de::Error, + { + match v { + "*" => Ok(StarOr::Star), + v => { + let other = FromStr::from_str(v).map_err(|e: T::Err| { + SE::custom(format!("Invalid `other` value: {}", e)) + })?; + Ok(StarOr::Other(other)) + } + } + } + } + + deserializer.deserialize_str(StarOrVisitor(PhantomData)) + } +} diff --git a/dump/src/reader/v5/mod.rs b/dump/src/reader/v5/mod.rs new file mode 100644 index 000000000..2265cbc63 --- /dev/null +++ b/dump/src/reader/v5/mod.rs @@ -0,0 +1,350 @@ +//! Here is what a dump v5 look like. +//! +//! ```text +//! . +//! ├── indexes +//! │   ├── 22c269d8-fbbd-4416-bd46-7c7c02849325 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   ├── 6d0471ba-2ed1-41de-8ea6-10db10fa2bb8 +//! │   │   ├── documents.jsonl +//! │   │   └── meta.json +//! │   └── f7d53ec4-0748-48e6-b66f-1fca9944b0fa +//! │   ├── documents.jsonl +//! │   └── meta.json +//! ├── index_uuids +//! │   └── data.jsonl +//! ├── instance-uid +//! ├── keys +//! ├── metadata.json +//! └── updates +//! ├── data.jsonl +//! └── updates_files +//! └── c83a004a-da98-4b94-b245-3256266c7281 +//! ``` +//! +//! Here is what `index_uuids/data.jsonl` looks like; +//! +//! ```json +//! {"uid":"dnd_spells","index_meta":{"uuid":"22c269d8-fbbd-4416-bd46-7c7c02849325","creation_task_id":9}} +//! {"uid":"movies","index_meta":{"uuid":"6d0471ba-2ed1-41de-8ea6-10db10fa2bb8","creation_task_id":1}} +//! {"uid":"products","index_meta":{"uuid":"f7d53ec4-0748-48e6-b66f-1fca9944b0fa","creation_task_id":4}} +//! ``` +//! + +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, ErrorKind, Seek, SeekFrom}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::compat::v5_to_v6::CompatV5ToV6; +use super::Document; +use crate::{Error, IndexMetadata, Result, Version}; + +pub mod errors; +pub mod keys; +pub mod meta; +pub mod settings; +pub mod tasks; + +pub type Settings = settings::Settings; +pub type Checked = settings::Checked; +pub type Unchecked = settings::Unchecked; + +pub type Task = tasks::Task; +pub type Key = keys::Key; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = tasks::TaskStatus; +pub type Details = tasks::TaskDetails; + +// everything related to the settings +pub type Setting = settings::Setting; +pub type TypoTolerance = settings::TypoSettings; +pub type MinWordSizeForTypos = settings::MinWordSizeTyposSetting; + +// everything related to the api keys +pub type Action = keys::Action; +pub type StarOr = meta::StarOr; + +// everything related to the errors +pub type ResponseError = errors::ResponseError; +pub type Code = errors::Code; + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Metadata { + db_version: String, + index_db_size: usize, + update_db_size: usize, + #[serde(with = "time::serde::rfc3339")] + dump_date: OffsetDateTime, +} + +pub struct V5Reader { + dump: TempDir, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, + index_uuid: Vec, +} + +impl V5Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let metadata = serde_json::from_reader(&*meta_file)?; + let index_uuid = File::open(dump.path().join("index_uuids/data.jsonl"))?; + let index_uuid = BufReader::new(index_uuid); + let index_uuid = index_uuid + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }) + .collect::>>()?; + + Ok(V5Reader { + metadata, + tasks: BufReader::new( + File::open(dump.path().join("updates").join("data.jsonl")).unwrap(), + ), + keys: BufReader::new(File::open(dump.path().join("keys"))?), + index_uuid, + dump, + }) + } + + pub fn to_v6(self) -> CompatV5ToV6 { + CompatV5ToV6::new_v5(self) + } + + pub fn version(&self) -> Version { + Version::V5 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + match fs::read_to_string(self.dump.path().join("instance-uid")) { + Ok(uuid) => Ok(Some(Uuid::parse_str(&uuid)?)), + Err(e) if e.kind() == ErrorKind::NotFound => Ok(None), + Err(e) => Err(e.into()), + } + } + + pub fn indexes(&self) -> Result> + '_> { + Ok(self.index_uuid.iter().map(|index| -> Result<_> { + V5IndexReader::new( + index.uid.clone(), + &self.dump.path().join("indexes").join(index.index_meta.uuid.to_string()), + ) + })) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?)?; + if !task.is_finished() { + if let Some(uuid) = task.get_content_uuid() { + let update_file_path = self + .dump + .path() + .join("updates") + .join("updates_files") + .join(uuid.to_string()); + Ok(( + task, + Some( + Box::new(UpdateFile::new(&update_file_path)?) as Box + ), + )) + } else { + Ok((task, None)) + } + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Result> + '_>> { + self.keys.seek(SeekFrom::Start(0))?; + Ok(Box::new( + (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + )) + } +} + +pub struct V5IndexReader { + metadata: IndexMetadata, + settings: Settings, + + documents: BufReader, +} + +impl V5IndexReader { + pub fn new(name: String, path: &Path) -> Result { + let meta = File::open(path.join("meta.json"))?; + let meta: meta::DumpMeta = serde_json::from_reader(meta)?; + + let metadata = IndexMetadata { + uid: name, + primary_key: meta.primary_key, + // FIXME: Iterate over the whole task queue to find the creation and last update date. + created_at: OffsetDateTime::now_utc(), + updated_at: OffsetDateTime::now_utc(), + }; + + let ret = V5IndexReader { + metadata, + settings: meta.settings.check(), + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + Ok(self.settings.clone()) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fs::File; + use std::io::BufReader; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use tempfile::TempDir; + + use super::*; + + #[test] + #[ignore] + fn read_dump_v5() { + let dump = File::open("tests/assets/v5.dump").unwrap(); + let dir = TempDir::new().unwrap(); + let mut dump = BufReader::new(dump); + let gz = GzDecoder::new(&mut dump); + let mut archive = tar::Archive::new(gz); + archive.unpack(dir.path()).unwrap(); + + let mut dump = V5Reader::open(dir).unwrap(); + + // top level infos + insta::assert_display_snapshot!(dump.date().unwrap(), @"2022-10-04 15:55:10.344982459 +00:00:00"); + insta::assert_display_snapshot!(dump.instance_uid().unwrap().unwrap(), @"9e15e977-f2ae-4761-943f-1eaf75fd736d"); + + // tasks + let tasks = dump.tasks().collect::>>().unwrap(); + let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip(); + meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e159863f0442b2e987ce37fbd57af76b"); + assert_eq!(update_files.len(), 22); + assert!(update_files[0].is_none()); // the dump creation + assert!(update_files[1].is_some()); // the enqueued document addition + assert!(update_files[2..].iter().all(|u| u.is_none())); // everything already processed + + let update_file = update_files.remove(1).unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(update_file), @"7b8889539b669c7b9ddba448bafa385d"); + + // keys + let keys = dump.keys().unwrap().collect::>>().unwrap(); + meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"091ddad754f3cc7cf1d03a477855e819"); + + // indexes + let mut indexes = dump.indexes().unwrap().collect::>>().unwrap(); + // the index are not ordered in any way by default + indexes.sort_by_key(|index| index.metadata().uid.to_string()); + + let mut products = indexes.pop().unwrap(); + let mut movies = indexes.pop().unwrap(); + let mut spells = indexes.pop().unwrap(); + assert!(indexes.is_empty()); + + // products + insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "products", + "primaryKey": "sku", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", products.settings()), @"b392b928dab63468318b2bdaad844c5a"); + let documents = products.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"b01c8371aea4c7171af0d4d846a2bdca"); + + // movies + insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "movies", + "primaryKey": "id", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"2f881248b7c3623e2ba2885dbf0b2c18"); + let documents = movies.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 200); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"e962baafd2fbae4cdd14e876053b0c5a"); + + // spells + insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###" + { + "uid": "dnd_spells", + "primaryKey": "index", + "createdAt": "[now]", + "updatedAt": "[now]" + } + "###); + + meili_snap::snapshot_hash!(format!("{:#?}", spells.settings()), @"ade154e63ab713de67919892917d3d9d"); + let documents = spells.documents().unwrap().collect::>>().unwrap(); + assert_eq!(documents.len(), 10); + meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce"); + } +} diff --git a/dump/src/reader/v5/settings.rs b/dump/src/reader/v5/settings.rs new file mode 100644 index 000000000..9a542149f --- /dev/null +++ b/dump/src/reader/v5/settings.rs @@ -0,0 +1,239 @@ +use std::collections::{BTreeMap, BTreeSet}; +use std::marker::PhantomData; + +use serde::{Deserialize, Deserializer, Serialize}; + +#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)] +pub struct Checked; + +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] +pub struct Unchecked; + +/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings +/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a +/// call to `check` will return a `Settings` from a `Settings`. +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] +pub struct Settings { + #[serde(default)] + pub displayed_attributes: Setting>, + + #[serde(default)] + pub searchable_attributes: Setting>, + + #[serde(default)] + pub filterable_attributes: Setting>, + #[serde(default)] + pub sortable_attributes: Setting>, + #[serde(default)] + pub ranking_rules: Setting>, + #[serde(default)] + pub stop_words: Setting>, + #[serde(default)] + pub synonyms: Setting>>, + #[serde(default)] + pub distinct_attribute: Setting, + #[serde(default)] + pub typo_tolerance: Setting, + #[serde(default)] + pub faceting: Setting, + #[serde(default)] + pub pagination: Setting, + + #[serde(skip)] + pub _kind: PhantomData, +} + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum Setting { + Set(T), + Reset, + NotSet, +} + +impl Default for Setting { + fn default() -> Self { + Self::NotSet + } +} + +impl Setting { + pub fn set(self) -> Option { + match self { + Self::Set(value) => Some(value), + _ => None, + } + } + + pub const fn as_ref(&self) -> Setting<&T> { + match *self { + Self::Set(ref value) => Setting::Set(value), + Self::Reset => Setting::Reset, + Self::NotSet => Setting::NotSet, + } + } + + pub const fn is_not_set(&self) -> bool { + matches!(self, Self::NotSet) + } + + /// If `Self` is `Reset`, then map self to `Set` with the provided `val`. + pub fn or_reset(self, val: T) -> Self { + match self { + Self::Reset => Self::Set(val), + otherwise => otherwise, + } + } +} + +impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting { + fn deserialize(deserializer: D) -> std::result::Result + where + D: Deserializer<'de>, + { + Deserialize::deserialize(deserializer).map(|x| match x { + Some(x) => Self::Set(x), + None => Self::Reset, // Reset is forced by sending null value + }) + } +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct MinWordSizeTyposSetting { + #[serde(default)] + pub one_typo: Setting, + #[serde(default)] + pub two_typos: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct TypoSettings { + #[serde(default)] + pub enabled: Setting, + #[serde(default)] + pub min_word_size_for_typos: Setting, + #[serde(default)] + pub disable_on_words: Setting>, + #[serde(default)] + pub disable_on_attributes: Setting>, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct FacetingSettings { + #[serde(default)] + pub max_values_per_facet: Setting, +} + +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +pub struct PaginationSettings { + #[serde(default)] + pub max_total_hits: Setting, +} + +impl Settings { + pub fn cleared() -> Settings { + Settings { + displayed_attributes: Setting::Reset, + searchable_attributes: Setting::Reset, + filterable_attributes: Setting::Reset, + sortable_attributes: Setting::Reset, + ranking_rules: Setting::Reset, + stop_words: Setting::Reset, + synonyms: Setting::Reset, + distinct_attribute: Setting::Reset, + typo_tolerance: Setting::Reset, + faceting: Setting::Reset, + pagination: Setting::Reset, + _kind: PhantomData, + } + } + + pub fn into_unchecked(self) -> Settings { + let Self { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + faceting, + pagination, + .. + } = self; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + synonyms, + distinct_attribute, + typo_tolerance, + faceting, + pagination, + _kind: PhantomData, + } + } +} + +impl Settings { + pub fn check(self) -> Settings { + let displayed_attributes = match self.displayed_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + let searchable_attributes = match self.searchable_attributes { + Setting::Set(fields) => { + if fields.iter().any(|f| f == "*") { + Setting::Reset + } else { + Setting::Set(fields) + } + } + otherwise => otherwise, + }; + + Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes: self.filterable_attributes, + sortable_attributes: self.sortable_attributes, + ranking_rules: self.ranking_rules, + stop_words: self.stop_words, + synonyms: self.synonyms, + distinct_attribute: self.distinct_attribute, + typo_tolerance: self.typo_tolerance, + faceting: self.faceting, + pagination: self.pagination, + _kind: PhantomData, + } + } +} diff --git a/dump/src/reader/v5/tasks.rs b/dump/src/reader/v5/tasks.rs new file mode 100644 index 000000000..125e20559 --- /dev/null +++ b/dump/src/reader/v5/tasks.rs @@ -0,0 +1,413 @@ +use serde::Deserialize; +use time::{Duration, OffsetDateTime}; +use uuid::Uuid; + +use super::errors::ResponseError; +use super::meta::IndexUid; +use super::settings::{Settings, Unchecked}; + +pub type TaskId = u32; +pub type BatchId = u32; + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub struct Task { + pub id: TaskId, + /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) + /// then this is None + // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of + // the TaskContent. + pub content: TaskContent, + pub events: Vec, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +#[allow(clippy::large_enum_variant)] +pub enum TaskContent { + DocumentAddition { + index_uid: IndexUid, + content_uuid: Uuid, + merge_strategy: IndexDocumentsMethod, + primary_key: Option, + documents_count: usize, + allow_index_creation: bool, + }, + DocumentDeletion { + index_uid: IndexUid, + deletion: DocumentDeletion, + }, + SettingsUpdate { + index_uid: IndexUid, + settings: Settings, + /// Indicates whether the task was a deletion + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion { + index_uid: IndexUid, + }, + IndexCreation { + index_uid: IndexUid, + primary_key: Option, + }, + IndexUpdate { + index_uid: IndexUid, + primary_key: Option, + }, + Dump { + uid: String, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum IndexDocumentsMethod { + /// Replace the previous document with the new one, + /// removing all the already known attributes. + ReplaceDocuments, + + /// Merge the previous version of the document with the new version, + /// replacing old attributes values with the new ones and add the new attributes. + UpdateDocuments, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum DocumentDeletion { + Clear, + Ids(Vec), +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskEvent { + Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Batched { + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + batch_id: BatchId, + }, + Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), + Succeeded { + result: TaskResult, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, + Failed { + error: ResponseError, + #[serde(with = "time::serde::rfc3339")] + timestamp: OffsetDateTime, + }, +} + +#[derive(Clone, Debug, Deserialize, PartialEq, Eq)] +#[cfg_attr(test, derive(serde::Serialize))] +pub enum TaskResult { + DocumentAddition { indexed_documents: u64 }, + DocumentDeletion { deleted_documents: u64 }, + ClearAll { deleted_documents: u64 }, + Other, +} + +impl Task { + /// Return true when a task is finished. + /// A task is finished when its last state is either `Succeeded` or `Failed`. + pub fn is_finished(&self) -> bool { + self.events.last().map_or(false, |event| { + matches!(event, TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. }) + }) + } + + /// Return the content_uuid of the `Task` if there is one. + pub fn get_content_uuid(&self) -> Option { + match self { + Task { content: TaskContent::DocumentAddition { content_uuid, .. }, .. } => { + Some(*content_uuid) + } + _ => None, + } + } + + pub fn index_uid(&self) -> Option<&str> { + match &self.content { + TaskContent::DocumentAddition { index_uid, .. } + | TaskContent::DocumentDeletion { index_uid, .. } + | TaskContent::SettingsUpdate { index_uid, .. } + | TaskContent::IndexDeletion { index_uid } + | TaskContent::IndexCreation { index_uid, .. } + | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), + TaskContent::Dump { .. } => None, + } + } +} + +impl IndexUid { + pub fn into_inner(self) -> String { + self.0 + } + + /// Return a reference over the inner str. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::ops::Deref for IndexUid { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +#[cfg_attr(test, serde(rename_all = "camelCase"))] +pub struct TaskView { + pub uid: TaskId, + pub index_uid: Option, + pub status: TaskStatus, + #[cfg_attr(test, serde(rename = "type"))] + pub task_type: TaskType, + #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] + pub details: Option, + #[cfg_attr(test, serde(skip_serializing_if = "Option::is_none"))] + pub error: Option, + #[cfg_attr(test, serde(serialize_with = "serialize_duration"))] + pub duration: Option, + #[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::serialize"))] + pub enqueued_at: OffsetDateTime, + #[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::option::serialize"))] + pub started_at: Option, + #[cfg_attr(test, serde(serialize_with = "time::serde::rfc3339::option::serialize"))] + pub finished_at: Option, +} + +impl From for TaskView { + fn from(task: Task) -> Self { + let index_uid = task.index_uid().map(String::from); + let Task { id, content, events } = task; + + let (task_type, mut details) = match content { + TaskContent::DocumentAddition { documents_count, .. } => { + let details = TaskDetails::DocumentAddition { + received_documents: documents_count, + indexed_documents: None, + }; + + (TaskType::DocumentAdditionOrUpdate, Some(details)) + } + TaskContent::DocumentDeletion { deletion: DocumentDeletion::Ids(ids), .. } => ( + TaskType::DocumentDeletion, + Some(TaskDetails::DocumentDeletion { + received_document_ids: ids.len(), + deleted_documents: None, + }), + ), + TaskContent::DocumentDeletion { deletion: DocumentDeletion::Clear, .. } => ( + TaskType::DocumentDeletion, + Some(TaskDetails::ClearAll { deleted_documents: None }), + ), + TaskContent::IndexDeletion { .. } => { + (TaskType::IndexDeletion, Some(TaskDetails::ClearAll { deleted_documents: None })) + } + TaskContent::SettingsUpdate { settings, .. } => { + (TaskType::SettingsUpdate, Some(TaskDetails::Settings { settings })) + } + TaskContent::IndexCreation { primary_key, .. } => { + (TaskType::IndexCreation, Some(TaskDetails::IndexInfo { primary_key })) + } + TaskContent::IndexUpdate { primary_key, .. } => { + (TaskType::IndexUpdate, Some(TaskDetails::IndexInfo { primary_key })) + } + TaskContent::Dump { uid } => { + (TaskType::DumpCreation, Some(TaskDetails::Dump { dump_uid: uid })) + } + }; + + // An event always has at least one event: "Created" + let (status, error, finished_at) = match events.last().unwrap() { + TaskEvent::Created(_) => (TaskStatus::Enqueued, None, None), + TaskEvent::Batched { .. } => (TaskStatus::Enqueued, None, None), + TaskEvent::Processing(_) => (TaskStatus::Processing, None, None), + TaskEvent::Succeeded { timestamp, result } => { + match (result, &mut details) { + ( + TaskResult::DocumentAddition { indexed_documents: num, .. }, + Some(TaskDetails::DocumentAddition { ref mut indexed_documents, .. }), + ) => { + indexed_documents.replace(*num); + } + ( + TaskResult::DocumentDeletion { deleted_documents: docs, .. }, + Some(TaskDetails::DocumentDeletion { ref mut deleted_documents, .. }), + ) => { + deleted_documents.replace(*docs); + } + ( + TaskResult::ClearAll { deleted_documents: docs }, + Some(TaskDetails::ClearAll { ref mut deleted_documents }), + ) => { + deleted_documents.replace(*docs); + } + _ => (), + } + (TaskStatus::Succeeded, None, Some(*timestamp)) + } + TaskEvent::Failed { timestamp, error } => { + match details { + Some(TaskDetails::DocumentDeletion { ref mut deleted_documents, .. }) => { + deleted_documents.replace(0); + } + Some(TaskDetails::ClearAll { ref mut deleted_documents, .. }) => { + deleted_documents.replace(0); + } + Some(TaskDetails::DocumentAddition { ref mut indexed_documents, .. }) => { + indexed_documents.replace(0); + } + _ => (), + } + (TaskStatus::Failed, Some(error.clone()), Some(*timestamp)) + } + }; + + let enqueued_at = match events.first() { + Some(TaskEvent::Created(ts)) => *ts, + _ => unreachable!("A task must always have a creation event."), + }; + + let started_at = events.iter().find_map(|e| match e { + TaskEvent::Processing(ts) => Some(*ts), + _ => None, + }); + + let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts)); + + Self { + uid: id, + index_uid, + status, + task_type, + details, + error, + duration, + enqueued_at, + started_at, + finished_at, + } + } +} + +#[derive(Debug, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub enum TaskType { + IndexCreation, + IndexUpdate, + IndexDeletion, + DocumentAdditionOrUpdate, + DocumentDeletion, + SettingsUpdate, + DumpCreation, +} + +impl From for TaskType { + fn from(other: TaskContent) -> Self { + match other { + TaskContent::IndexCreation { .. } => TaskType::IndexCreation, + TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, + TaskContent::IndexDeletion { .. } => TaskType::IndexDeletion, + TaskContent::DocumentAddition { .. } => TaskType::DocumentAdditionOrUpdate, + TaskContent::DocumentDeletion { .. } => TaskType::DocumentDeletion, + TaskContent::SettingsUpdate { .. } => TaskType::SettingsUpdate, + TaskContent::Dump { .. } => TaskType::DumpCreation, + } + } +} + +#[derive(Debug, PartialEq, Eq, Deserialize)] +#[cfg_attr(test, derive(serde::Serialize))] +#[serde(rename_all = "camelCase")] +pub enum TaskStatus { + Enqueued, + Processing, + Succeeded, + Failed, +} + +#[derive(Debug)] +#[cfg_attr(test, derive(serde::Serialize))] +#[cfg_attr(test, serde(untagged))] +#[allow(clippy::large_enum_variant)] +pub enum TaskDetails { + #[cfg_attr(test, serde(rename_all = "camelCase"))] + DocumentAddition { received_documents: usize, indexed_documents: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + Settings { + #[cfg_attr(test, serde(flatten))] + settings: Settings, + }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + IndexInfo { primary_key: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + DocumentDeletion { received_document_ids: usize, deleted_documents: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + ClearAll { deleted_documents: Option }, + #[cfg_attr(test, serde(rename_all = "camelCase"))] + Dump { dump_uid: String }, +} + +/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for +/// https://github.com/time-rs/time/issues/378. +/// This code is a port of the old code of time that was removed in 0.2. +#[cfg(test)] +fn serialize_duration( + duration: &Option, + serializer: S, +) -> Result { + use std::fmt::Write; + + match duration { + Some(duration) => { + // technically speaking, negative duration is not valid ISO 8601 + if duration.is_negative() { + return serializer.serialize_none(); + } + + const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds(); + let secs = duration.whole_seconds(); + let days = secs / SECS_PER_DAY; + let secs = secs - days * SECS_PER_DAY; + let hasdate = days != 0; + let nanos = duration.subsec_nanoseconds(); + let hastime = (secs != 0 || nanos != 0) || !hasdate; + + // all the following unwrap can't fail + let mut res = String::new(); + write!(&mut res, "P").unwrap(); + + if hasdate { + write!(&mut res, "{}D", days).unwrap(); + } + + const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds(); + const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds(); + + if hastime { + if nanos == 0 { + write!(&mut res, "T{}S", secs).unwrap(); + } else if nanos % NANOS_PER_MILLI == 0 { + write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap(); + } else if nanos % NANOS_PER_MICRO == 0 { + write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap(); + } else { + write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap(); + } + } + + serializer.serialize_str(&res) + } + None => serializer.serialize_none(), + } +} diff --git a/dump/src/reader/v6/mod.rs b/dump/src/reader/v6/mod.rs new file mode 100644 index 000000000..1cb1d66db --- /dev/null +++ b/dump/src/reader/v6/mod.rs @@ -0,0 +1,191 @@ +use std::fs::{self, File}; +use std::io::{BufRead, BufReader, ErrorKind}; +use std::path::Path; + +pub use meilisearch_types::milli; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use super::Document; +use crate::{Error, IndexMetadata, Result, Version}; + +pub type Metadata = crate::Metadata; + +pub type Settings = meilisearch_types::settings::Settings; +pub type Checked = meilisearch_types::settings::Checked; +pub type Unchecked = meilisearch_types::settings::Unchecked; + +pub type Task = crate::TaskDump; +pub type Key = meilisearch_types::keys::Key; + +// ===== Other types to clarify the code of the compat module +// everything related to the tasks +pub type Status = meilisearch_types::tasks::Status; +pub type Kind = crate::KindDump; +pub type Details = meilisearch_types::tasks::Details; + +// everything related to the settings +pub type Setting = meilisearch_types::milli::update::Setting; +pub type TypoTolerance = meilisearch_types::settings::TypoSettings; +pub type MinWordSizeForTypos = meilisearch_types::settings::MinWordSizeTyposSetting; +pub type FacetingSettings = meilisearch_types::settings::FacetingSettings; +pub type PaginationSettings = meilisearch_types::settings::PaginationSettings; + +// everything related to the api keys +pub type Action = meilisearch_types::keys::Action; +pub type StarOr = meilisearch_types::star_or::StarOr; +pub type IndexUid = meilisearch_types::index_uid::IndexUid; + +// everything related to the errors +pub type ResponseError = meilisearch_types::error::ResponseError; +pub type Code = meilisearch_types::error::Code; + +pub struct V6Reader { + dump: TempDir, + instance_uid: Option, + metadata: Metadata, + tasks: BufReader, + keys: BufReader, +} + +impl V6Reader { + pub fn open(dump: TempDir) -> Result { + let meta_file = fs::read(dump.path().join("metadata.json"))?; + let instance_uid = match fs::read_to_string(dump.path().join("instance_uid.uuid")) { + Ok(uuid) => Some(Uuid::parse_str(&uuid)?), + Err(e) if e.kind() == ErrorKind::NotFound => None, + Err(e) => return Err(e.into()), + }; + + Ok(V6Reader { + metadata: serde_json::from_reader(&*meta_file)?, + instance_uid, + tasks: BufReader::new(File::open(dump.path().join("tasks").join("queue.jsonl"))?), + keys: BufReader::new(File::open(dump.path().join("keys.jsonl"))?), + dump, + }) + } + + pub fn version(&self) -> Version { + Version::V6 + } + + pub fn date(&self) -> Option { + Some(self.metadata.dump_date) + } + + pub fn instance_uid(&self) -> Result> { + Ok(self.instance_uid) + } + + pub fn indexes(&self) -> Result> + '_>> { + let entries = fs::read_dir(self.dump.path().join("indexes"))?; + Ok(Box::new( + entries + .map(|entry| -> Result> { + let entry = entry?; + if entry.file_type()?.is_dir() { + let index = V6IndexReader::new( + entry.file_name().to_str().ok_or(Error::BadIndexName)?.to_string(), + &entry.path(), + )?; + Ok(Some(index)) + } else { + Ok(None) + } + }) + .filter_map(|entry| entry.transpose()), + )) + } + + pub fn tasks( + &mut self, + ) -> Box>)>> + '_> { + Box::new((&mut self.tasks).lines().map(|line| -> Result<_> { + let task: Task = serde_json::from_str(&line?).unwrap(); + + let update_file_path = self + .dump + .path() + .join("tasks") + .join("update_files") + .join(format!("{}.jsonl", task.uid)); + + if update_file_path.exists() { + Ok(( + task, + Some(Box::new(UpdateFile::new(&update_file_path).unwrap()) + as Box), + )) + } else { + Ok((task, None)) + } + })) + } + + pub fn keys(&mut self) -> Box> + '_> { + Box::new( + (&mut self.keys).lines().map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) }), + ) + } +} + +pub struct UpdateFile { + reader: BufReader, +} + +impl UpdateFile { + fn new(path: &Path) -> Result { + Ok(UpdateFile { reader: BufReader::new(File::open(path)?) }) + } +} + +impl Iterator for UpdateFile { + type Item = Result; + + fn next(&mut self) -> Option { + (&mut self.reader) + .lines() + .map(|line| { + line.map_err(Error::from) + .and_then(|line| serde_json::from_str(&line).map_err(Error::from)) + }) + .next() + } +} + +pub struct V6IndexReader { + metadata: IndexMetadata, + documents: BufReader, + settings: BufReader, +} + +impl V6IndexReader { + pub fn new(_name: String, path: &Path) -> Result { + let metadata = File::open(path.join("metadata.json"))?; + + let ret = V6IndexReader { + metadata: serde_json::from_reader(metadata)?, + documents: BufReader::new(File::open(path.join("documents.jsonl"))?), + settings: BufReader::new(File::open(path.join("settings.json"))?), + }; + + Ok(ret) + } + + pub fn metadata(&self) -> &IndexMetadata { + &self.metadata + } + + pub fn documents(&mut self) -> Result> + '_> { + Ok((&mut self.documents) + .lines() + .map(|line| -> Result<_> { Ok(serde_json::from_str(&line?)?) })) + } + + pub fn settings(&mut self) -> Result> { + let settings: Settings = serde_json::from_reader(&mut self.settings)?; + Ok(settings.check()) + } +} diff --git a/dump/src/writer.rs b/dump/src/writer.rs new file mode 100644 index 000000000..29aa2508d --- /dev/null +++ b/dump/src/writer.rs @@ -0,0 +1,350 @@ +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; +use std::path::PathBuf; + +use flate2::write::GzEncoder; +use flate2::Compression; +use meilisearch_types::keys::Key; +use meilisearch_types::settings::{Checked, Settings}; +use serde_json::{Map, Value}; +use tempfile::TempDir; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::reader::Document; +use crate::{IndexMetadata, Metadata, Result, TaskDump, CURRENT_DUMP_VERSION}; + +pub struct DumpWriter { + dir: TempDir, +} + +impl DumpWriter { + pub fn new(instance_uuid: Option) -> Result { + let dir = TempDir::new()?; + + if let Some(instance_uuid) = instance_uuid { + fs::write( + dir.path().join("instance_uid.uuid"), + &instance_uuid.as_hyphenated().to_string(), + )?; + } + + let metadata = Metadata { + dump_version: CURRENT_DUMP_VERSION, + db_version: env!("CARGO_PKG_VERSION").to_string(), + dump_date: OffsetDateTime::now_utc(), + }; + fs::write(dir.path().join("metadata.json"), serde_json::to_string(&metadata)?)?; + + std::fs::create_dir(&dir.path().join("indexes"))?; + + Ok(DumpWriter { dir }) + } + + pub fn create_index(&self, index_name: &str, metadata: &IndexMetadata) -> Result { + IndexWriter::new(self.dir.path().join("indexes").join(index_name), metadata) + } + + pub fn create_keys(&self) -> Result { + KeyWriter::new(self.dir.path().to_path_buf()) + } + + pub fn create_tasks_queue(&self) -> Result { + TaskWriter::new(self.dir.path().join("tasks")) + } + + pub fn persist_to(self, mut writer: impl Write) -> Result<()> { + let gz_encoder = GzEncoder::new(&mut writer, Compression::default()); + let mut tar_encoder = tar::Builder::new(gz_encoder); + tar_encoder.append_dir_all(".", self.dir.path())?; + let gz_encoder = tar_encoder.into_inner()?; + gz_encoder.finish()?; + writer.flush()?; + + Ok(()) + } +} + +pub struct KeyWriter { + keys: BufWriter, +} + +impl KeyWriter { + pub(crate) fn new(path: PathBuf) -> Result { + let keys = File::create(path.join("keys.jsonl"))?; + Ok(KeyWriter { keys: BufWriter::new(keys) }) + } + + pub fn push_key(&mut self, key: &Key) -> Result<()> { + self.keys.write_all(&serde_json::to_vec(key)?)?; + self.keys.write_all(b"\n")?; + Ok(()) + } + + pub fn flush(mut self) -> Result<()> { + self.keys.flush()?; + Ok(()) + } +} + +pub struct TaskWriter { + queue: BufWriter, + update_files: PathBuf, +} + +impl TaskWriter { + pub(crate) fn new(path: PathBuf) -> Result { + std::fs::create_dir(&path)?; + + let queue = File::create(path.join("queue.jsonl"))?; + let update_files = path.join("update_files"); + std::fs::create_dir(&update_files)?; + + Ok(TaskWriter { queue: BufWriter::new(queue), update_files }) + } + + /// Pushes tasks in the dump. + /// If the tasks has an associated `update_file` it'll use the `task_id` as its name. + pub fn push_task(&mut self, task: &TaskDump) -> Result { + self.queue.write_all(&serde_json::to_vec(task)?)?; + self.queue.write_all(b"\n")?; + + Ok(UpdateFile::new(self.update_files.join(format!("{}.jsonl", task.uid)))) + } + + pub fn flush(mut self) -> Result<()> { + self.queue.flush()?; + Ok(()) + } +} + +pub struct UpdateFile { + path: PathBuf, + writer: Option>, +} + +impl UpdateFile { + pub(crate) fn new(path: PathBuf) -> UpdateFile { + UpdateFile { path, writer: None } + } + + pub fn push_document(&mut self, document: &Document) -> Result<()> { + if let Some(writer) = self.writer.as_mut() { + writer.write_all(&serde_json::to_vec(document)?)?; + writer.write_all(b"\n")?; + } else { + let file = File::create(&self.path).unwrap(); + self.writer = Some(BufWriter::new(file)); + self.push_document(document)?; + } + Ok(()) + } + + pub fn flush(self) -> Result<()> { + if let Some(mut writer) = self.writer { + writer.flush()?; + } + Ok(()) + } +} + +pub struct IndexWriter { + documents: BufWriter, + settings: File, +} + +impl IndexWriter { + pub(self) fn new(path: PathBuf, metadata: &IndexMetadata) -> Result { + std::fs::create_dir(&path)?; + + let metadata_file = File::create(path.join("metadata.json"))?; + serde_json::to_writer(metadata_file, metadata)?; + + let documents = File::create(path.join("documents.jsonl"))?; + let settings = File::create(path.join("settings.json"))?; + + Ok(IndexWriter { documents: BufWriter::new(documents), settings }) + } + + pub fn push_document(&mut self, document: &Map) -> Result<()> { + serde_json::to_writer(&mut self.documents, document)?; + self.documents.write_all(b"\n")?; + Ok(()) + } + + pub fn flush(&mut self) -> Result<()> { + self.documents.flush()?; + Ok(()) + } + + pub fn settings(mut self, settings: &Settings) -> Result<()> { + self.settings.write_all(&serde_json::to_vec(&settings)?)?; + Ok(()) + } +} + +#[cfg(test)] +pub(crate) mod test { + use std::fmt::Write; + use std::io::BufReader; + use std::path::Path; + use std::str::FromStr; + + use flate2::bufread::GzDecoder; + use meili_snap::insta; + use meilisearch_types::settings::Unchecked; + + use super::*; + use crate::reader::Document; + use crate::test::{ + create_test_api_keys, create_test_documents, create_test_dump, create_test_instance_uid, + create_test_settings, create_test_tasks, + }; + + fn create_directory_hierarchy(dir: &Path) -> String { + let mut ret = String::new(); + writeln!(ret, ".").unwrap(); + ret.push_str(&_create_directory_hierarchy(dir, 0)); + ret + } + + fn _create_directory_hierarchy(dir: &Path, depth: usize) -> String { + let mut ret = String::new(); + + // the entries are not guarenteed to be returned in the same order thus we need to sort them. + let mut entries = + fs::read_dir(dir).unwrap().collect::, _>>().unwrap(); + + // I want the directories first and then sort by name. + entries.sort_by(|a, b| { + let (aft, bft) = (a.file_type().unwrap(), b.file_type().unwrap()); + + if aft.is_dir() && bft.is_dir() { + a.file_name().cmp(&b.file_name()) + } else if aft.is_file() && bft.is_dir() { + std::cmp::Ordering::Greater + } else if bft.is_file() && aft.is_dir() { + std::cmp::Ordering::Less + } else { + a.file_name().cmp(&b.file_name()) + } + }); + + for (idx, entry) in entries.iter().enumerate() { + let mut ident = String::new(); + + for _ in 0..depth { + ident.push('│'); + ident.push_str(&" ".repeat(4)); + } + if idx == entries.len() - 1 { + ident.push('└'); + } else { + ident.push('├'); + } + ident.push_str(&"-".repeat(4)); + + let name = entry.file_name().into_string().unwrap(); + let file_type = entry.file_type().unwrap(); + let is_dir = if file_type.is_dir() { "/" } else { "" }; + + assert!(!file_type.is_symlink()); + writeln!(ret, "{ident} {name}{is_dir}").unwrap(); + + if file_type.is_dir() { + ret.push_str(&_create_directory_hierarchy(&entry.path(), depth + 1)); + } + } + ret + } + + #[test] + #[ignore] + fn test_creating_dump() { + let file = create_test_dump(); + let mut file = BufReader::new(file); + + // ============ ensuring we wrote everything in the correct place. + let dump = tempfile::tempdir().unwrap(); + + let gz = GzDecoder::new(&mut file); + let mut tar = tar::Archive::new(gz); + tar.unpack(dump.path()).unwrap(); + + let dump_path = dump.path(); + + // ==== checking global file hierarchy (we want to be sure there isn't too many files or too few) + insta::assert_display_snapshot!(create_directory_hierarchy(dump_path), @r###" + . + ├---- indexes/ + │ └---- doggos/ + │ │ ├---- documents.jsonl + │ │ ├---- metadata.json + │ │ └---- settings.json + ├---- tasks/ + │ ├---- update_files/ + │ │ └---- 1.jsonl + │ └---- queue.jsonl + ├---- instance_uid.uuid + ├---- keys.jsonl + └---- metadata.json + "###); + + // ==== checking the top level infos + let metadata = fs::read_to_string(dump_path.join("metadata.json")).unwrap(); + let metadata: Metadata = serde_json::from_str(&metadata).unwrap(); + insta::assert_json_snapshot!(metadata, { ".dumpDate" => "[date]" }, @r###" + { + "dumpVersion": "V6", + "dbVersion": "0.29.0", + "dumpDate": "[date]" + } + "###); + + let instance_uid = fs::read_to_string(dump_path.join("instance_uid.uuid")).unwrap(); + assert_eq!(Uuid::from_str(&instance_uid).unwrap(), create_test_instance_uid()); + + // ==== checking the index + let docs = fs::read_to_string(dump_path.join("indexes/doggos/documents.jsonl")).unwrap(); + for (document, expected) in docs.lines().zip(create_test_documents()) { + assert_eq!(serde_json::from_str::>(document).unwrap(), expected); + } + let test_settings = + fs::read_to_string(dump_path.join("indexes/doggos/settings.json")).unwrap(); + assert_eq!( + serde_json::from_str::>(&test_settings).unwrap(), + create_test_settings().into_unchecked() + ); + let metadata = fs::read_to_string(dump_path.join("indexes/doggos/metadata.json")).unwrap(); + let metadata: IndexMetadata = serde_json::from_str(&metadata).unwrap(); + insta::assert_json_snapshot!(metadata, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }, @r###" + { + "uid": "doggo", + "primaryKey": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); + + // ==== checking the task queue + let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap(); + for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) { + assert_eq!(serde_json::from_str::(task).unwrap(), expected.0); + + if let Some(expected_update) = expected.1 { + let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid)); + println!("trying to open {}", path.display()); + let update = fs::read_to_string(path).unwrap(); + let documents: Vec = + update.lines().map(|line| serde_json::from_str(line).unwrap()).collect(); + assert_eq!(documents, expected_update); + } + } + + // ==== checking the keys + let keys = fs::read_to_string(dump_path.join("keys.jsonl")).unwrap(); + for (key, expected) in keys.lines().zip(create_test_api_keys()) { + assert_eq!(serde_json::from_str::(key).unwrap(), expected); + } + } +} diff --git a/dump/tests/assets/v2.dump b/dump/tests/assets/v2.dump new file mode 100644 index 000000000..eacea80a5 Binary files /dev/null and b/dump/tests/assets/v2.dump differ diff --git a/dump/tests/assets/v3.dump b/dump/tests/assets/v3.dump new file mode 100644 index 000000000..abf6fdf9f Binary files /dev/null and b/dump/tests/assets/v3.dump differ diff --git a/dump/tests/assets/v4.dump b/dump/tests/assets/v4.dump new file mode 100644 index 000000000..9dd276243 Binary files /dev/null and b/dump/tests/assets/v4.dump differ diff --git a/dump/tests/assets/v5.dump b/dump/tests/assets/v5.dump new file mode 100644 index 000000000..9b60049e4 Binary files /dev/null and b/dump/tests/assets/v5.dump differ diff --git a/file-store/Cargo.toml b/file-store/Cargo.toml new file mode 100644 index 000000000..0110a00b1 --- /dev/null +++ b/file-store/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "file-store" +version = "0.30.0" +edition = "2021" + +[dependencies] +tempfile = "3.3.0" +thiserror = "1.0.30" +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +faux = "0.1.8" diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs new file mode 100644 index 000000000..e05694c92 --- /dev/null +++ b/file-store/src/lib.rs @@ -0,0 +1,132 @@ +use std::collections::BTreeSet; +use std::fs::File as StdFile; +use std::ops::{Deref, DerefMut}; +use std::path::{Path, PathBuf}; +use std::str::FromStr; + +use tempfile::NamedTempFile; +use uuid::Uuid; + +const UPDATE_FILES_PATH: &str = "updates/updates_files"; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error(transparent)] + IoError(#[from] std::io::Error), + #[error(transparent)] + PersistError(#[from] tempfile::PersistError), +} + +pub type Result = std::result::Result; + +impl Deref for File { + type Target = NamedTempFile; + + fn deref(&self) -> &Self::Target { + &self.file + } +} + +impl DerefMut for File { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.file + } +} + +#[cfg_attr(test, faux::create)] +#[derive(Clone, Debug)] +pub struct FileStore { + path: PathBuf, +} + +#[cfg(not(test))] +impl FileStore { + pub fn new(path: impl AsRef) -> Result { + let path = path.as_ref().to_path_buf(); + std::fs::create_dir_all(&path)?; + Ok(FileStore { path }) + } +} + +#[cfg_attr(test, faux::methods)] +impl FileStore { + /// Creates a new temporary update file. + /// A call to `persist` is needed to persist the file in the database. + pub fn new_update(&self) -> Result<(Uuid, File)> { + let file = NamedTempFile::new_in(&self.path)?; + let uuid = Uuid::new_v4(); + let path = self.path.join(uuid.to_string()); + let update_file = File { file, path }; + + Ok((uuid, update_file)) + } + + /// Creates a new temporary update file with the given Uuid. + /// A call to `persist` is needed to persist the file in the database. + pub fn new_update_with_uuid(&self, uuid: u128) -> Result<(Uuid, File)> { + let file = NamedTempFile::new_in(&self.path)?; + let uuid = Uuid::from_u128(uuid); + let path = self.path.join(uuid.to_string()); + let update_file = File { file, path }; + + Ok((uuid, update_file)) + } + + /// Returns the file corresponding to the requested uuid. + pub fn get_update(&self, uuid: Uuid) -> Result { + let path = self.get_update_path(uuid); + let file = StdFile::open(path)?; + Ok(file) + } + + /// Returns the path that correspond to this uuid, the path could not exists. + pub fn get_update_path(&self, uuid: Uuid) -> PathBuf { + self.path.join(uuid.to_string()) + } + + /// Copies the content of the update file pointed to by `uuid` to the `dst` directory. + pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { + let src = self.path.join(uuid.to_string()); + let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); + std::fs::create_dir_all(&dst)?; + dst.push(uuid.to_string()); + std::fs::copy(src, dst)?; + Ok(()) + } + + pub fn get_size(&self, uuid: Uuid) -> Result { + Ok(self.get_update(uuid)?.metadata()?.len()) + } + + pub fn delete(&self, uuid: Uuid) -> Result<()> { + let path = self.path.join(uuid.to_string()); + std::fs::remove_file(path)?; + Ok(()) + } + + /// List the Uuids of the files in the FileStore + /// + /// This function is meant to be used by tests only. + #[doc(hidden)] + pub fn __all_uuids(&self) -> BTreeSet { + let mut uuids = BTreeSet::new(); + for entry in self.path.read_dir().unwrap() { + let entry = entry.unwrap(); + let uuid = Uuid::from_str(entry.file_name().to_str().unwrap()).unwrap(); + uuids.insert(uuid); + } + uuids + } +} + +pub struct File { + path: PathBuf, + file: NamedTempFile, +} + +impl File { + pub fn persist(self) -> Result<()> { + self.file.persist(&self.path)?; + Ok(()) + } +} diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml new file mode 100644 index 000000000..e46712703 --- /dev/null +++ b/index-scheduler/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "index-scheduler" +version = "0.30.0" +edition = "2021" + +[dependencies] +anyhow = "1.0.64" +bincode = "1.3.3" +csv = "1.1.6" +derive_builder = "0.11.2" +dump = { path = "../dump" } +enum-iterator = "1.1.3" +file-store = { path = "../file-store" } +log = "0.4.14" +meilisearch-types = { path = "../meilisearch-types" } +roaring = { version = "0.10.0", features = ["serde"] } +serde = { version = "1.0.136", features = ["derive"] } +serde_json = { version = "1.0.85", features = ["preserve_order"] } +synchronoise = "1.0.1" +tempfile = "3.3.0" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +big_s = "1.0.2" +crossbeam = "0.8.2" +insta = { version = "1.19.1", features = ["json", "redactions"] } +meili-snap = { path = "../meili-snap" } +nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f13885548fb415ead8fbb447e9e6d9314000a"} diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs new file mode 100644 index 000000000..d1ed691c6 --- /dev/null +++ b/index-scheduler/src/autobatcher.rs @@ -0,0 +1,727 @@ +/*! +The autobatcher is responsible for combining the next enqueued +tasks affecting a single index into a [batch](crate::batch::Batch). + +The main function of the autobatcher is [`next_autobatch`]. +*/ + +use std::ops::ControlFlow::{self, Break, Continue}; + +use meilisearch_types::milli::update::IndexDocumentsMethod::{ + self, ReplaceDocuments, UpdateDocuments, +}; +use meilisearch_types::tasks::TaskId; + +use crate::KindWithContent; + +/// Succinctly describes a task's [`Kind`](meilisearch_types::tasks::Kind) +/// for the purpose of simplifying the implementation of the autobatcher. +/// +/// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`] +enum AutobatchKind { + DocumentImport { method: IndexDocumentsMethod, allow_index_creation: bool }, + DocumentDeletion, + DocumentClear, + Settings { allow_index_creation: bool }, + IndexCreation, + IndexDeletion, + IndexUpdate, + IndexSwap, +} + +impl AutobatchKind { + #[rustfmt::skip] + fn allow_index_creation(&self) -> Option { + match self { + AutobatchKind::DocumentImport { allow_index_creation, .. } + | AutobatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), + _ => None, + } + } +} + +impl From for AutobatchKind { + fn from(kind: KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { method, allow_index_creation, .. } => { + AutobatchKind::DocumentImport { method, allow_index_creation } + } + KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, + KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, + KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { + AutobatchKind::Settings { + allow_index_creation: allow_index_creation && !is_deletion, + } + } + KindWithContent::IndexDeletion { .. } => AutobatchKind::IndexDeletion, + KindWithContent::IndexCreation { .. } => AutobatchKind::IndexCreation, + KindWithContent::IndexUpdate { .. } => AutobatchKind::IndexUpdate, + KindWithContent::IndexSwap { .. } => AutobatchKind::IndexSwap, + KindWithContent::TaskCancelation { .. } + | KindWithContent::TaskDeletion { .. } + | KindWithContent::DumpCreation { .. } + | KindWithContent::SnapshotCreation => { + panic!("The autobatcher should never be called with tasks that don't apply to an index.") + } + } + } +} + +#[derive(Debug)] +pub enum BatchKind { + DocumentClear { + ids: Vec, + }, + DocumentImport { + method: IndexDocumentsMethod, + allow_index_creation: bool, + import_ids: Vec, + }, + DocumentDeletion { + deletion_ids: Vec, + }, + ClearAndSettings { + other: Vec, + allow_index_creation: bool, + settings_ids: Vec, + }, + SettingsAndDocumentImport { + settings_ids: Vec, + method: IndexDocumentsMethod, + allow_index_creation: bool, + import_ids: Vec, + }, + Settings { + allow_index_creation: bool, + settings_ids: Vec, + }, + IndexDeletion { + ids: Vec, + }, + IndexCreation { + id: TaskId, + }, + IndexUpdate { + id: TaskId, + }, + IndexSwap { + id: TaskId, + }, +} + +impl BatchKind { + #[rustfmt::skip] + fn allow_index_creation(&self) -> Option { + match self { + BatchKind::DocumentImport { allow_index_creation, .. } + | BatchKind::ClearAndSettings { allow_index_creation, .. } + | BatchKind::SettingsAndDocumentImport { allow_index_creation, .. } + | BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation), + _ => None, + } + } +} + +impl BatchKind { + /// Returns a `ControlFlow::Break` if you must stop right now. + /// The boolean tell you if an index has been created by the batched task. + /// To ease the writting of the code. `true` can be returned when you don't need to create an index + /// but false can't be returned if you needs to create an index. + // TODO use an AutoBatchKind as input + pub fn new( + task_id: TaskId, + kind: KindWithContent, + ) -> (ControlFlow, bool) { + use AutobatchKind as K; + + match AutobatchKind::from(kind) { + K::IndexCreation => (Break(BatchKind::IndexCreation { id: task_id }), true), + K::IndexDeletion => (Break(BatchKind::IndexDeletion { ids: vec![task_id] }), false), + K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false), + K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false), + K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false), + K::DocumentImport { method, allow_index_creation } => ( + Continue(BatchKind::DocumentImport { + method, + allow_index_creation, + import_ids: vec![task_id], + }), + allow_index_creation, + ), + K::DocumentDeletion => { + (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) + } + K::Settings { allow_index_creation } => ( + Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), + allow_index_creation, + ), + } + } + + /// Returns a `ControlFlow::Break` if you must stop right now. + /// The boolean tell you if an index has been created by the batched task. + /// To ease the writting of the code. `true` can be returned when you don't need to create an index + /// but false can't be returned if you needs to create an index. + #[rustfmt::skip] + fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool) -> ControlFlow { + use AutobatchKind as K; + + match (self, kind) { + // We don't batch any of these operations + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this), + // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. + (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { + Break(this) + }, + // The index deletion can batch with everything but must stop after + ( + BatchKind::DocumentClear { mut ids } + | BatchKind::DocumentDeletion { deletion_ids: mut ids } + | BatchKind::DocumentImport { method: _, allow_index_creation: _, import_ids: mut ids } + | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, + K::IndexDeletion, + ) => { + ids.push(id); + Break(BatchKind::IndexDeletion { ids }) + } + ( + BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other } + | BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, settings_ids: mut other }, + K::IndexDeletion, + ) => { + ids.push(id); + ids.append(&mut other); + Break(BatchKind::IndexDeletion { ids }) + } + + ( + BatchKind::DocumentClear { mut ids }, + K::DocumentClear | K::DocumentDeletion, + ) => { + ids.push(id); + Continue(BatchKind::DocumentClear { ids }) + } + ( + this @ BatchKind::DocumentClear { .. }, + K::DocumentImport { .. } | K::Settings { .. }, + ) => Break(this), + ( + BatchKind::DocumentImport { method: _, allow_index_creation: _, import_ids: mut ids }, + K::DocumentClear, + ) => { + ids.push(id); + Continue(BatchKind::DocumentClear { ids }) + } + + // we can autobatch the same kind of document additions / updates + ( + BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, mut import_ids }, + K::DocumentImport { method: ReplaceDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::DocumentImport { + method: ReplaceDocuments, + allow_index_creation, + import_ids, + }) + } + ( + BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, mut import_ids }, + K::DocumentImport { method: UpdateDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::DocumentImport { + method: UpdateDocuments, + allow_index_creation, + import_ids, + }) + } + + // but we can't autobatch documents if it's not the same kind + // this match branch MUST be AFTER the previous one + ( + this @ BatchKind::DocumentImport { .. }, + K::DocumentDeletion | K::DocumentImport { .. }, + ) => Break(this), + + ( + BatchKind::DocumentImport { method, allow_index_creation, import_ids }, + K::Settings { .. }, + ) => Continue(BatchKind::SettingsAndDocumentImport { + settings_ids: vec![id], + method, + allow_index_creation, + import_ids, + }), + + (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => { + deletion_ids.push(id); + Continue(BatchKind::DocumentClear { ids: deletion_ids }) + } + (this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this), + (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => { + deletion_ids.push(id); + Continue(BatchKind::DocumentDeletion { deletion_ids }) + } + (this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this), + + ( + BatchKind::Settings { settings_ids, allow_index_creation }, + K::DocumentClear, + ) => Continue(BatchKind::ClearAndSettings { + settings_ids, + allow_index_creation, + other: vec![id], + }), + ( + this @ BatchKind::Settings { .. }, + K::DocumentImport { .. } | K::DocumentDeletion, + ) => Break(this), + ( + BatchKind::Settings { mut settings_ids, allow_index_creation }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::Settings { + allow_index_creation, + settings_ids, + }) + } + + ( + BatchKind::ClearAndSettings { mut other, settings_ids, allow_index_creation }, + K::DocumentClear, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + (this @ BatchKind::ClearAndSettings { .. }, K::DocumentImport { .. }) => Break(this), + ( + BatchKind::ClearAndSettings { + mut other, + settings_ids, + allow_index_creation, + }, + K::DocumentDeletion, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + ( + BatchKind::ClearAndSettings { mut settings_ids, other, allow_index_creation }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::ClearAndSettings { + other, + settings_ids, + allow_index_creation, + }) + } + ( + BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation }, + K::DocumentClear, + ) => { + other.push(id); + Continue(BatchKind::ClearAndSettings { + settings_ids, + other, + allow_index_creation, + }) + } + + ( + BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation }, + K::DocumentImport { method: ReplaceDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::SettingsAndDocumentImport { + settings_ids, + method: ReplaceDocuments, + allow_index_creation, + import_ids, + }) + } + ( + BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, mut import_ids }, + K::DocumentImport { method: UpdateDocuments, .. }, + ) => { + import_ids.push(id); + Continue(BatchKind::SettingsAndDocumentImport { + settings_ids, + method: UpdateDocuments, + allow_index_creation, + import_ids, + }) + } + // But we can't batch a settings and a doc op with another doc op + // this MUST be AFTER the two previous branch + ( + this @ BatchKind::SettingsAndDocumentImport { .. }, + K::DocumentDeletion | K::DocumentImport { .. }, + ) => Break(this), + ( + BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation, import_ids }, + K::Settings { .. }, + ) => { + settings_ids.push(id); + Continue(BatchKind::SettingsAndDocumentImport { + settings_ids, + method, + allow_index_creation, + import_ids, + }) + } + ( + BatchKind::IndexCreation { .. } + | BatchKind::IndexDeletion { .. } + | BatchKind::IndexUpdate { .. } + | BatchKind::IndexSwap { .. }, + _, + ) => { + unreachable!() + } + } + } +} + +/// Create a batch from an ordered list of tasks. +/// +/// ## Preconditions +/// 1. The tasks must be enqueued and given in the order in which they were enqueued +/// 2. The tasks must not be prioritised tasks (e.g. task cancellation, dump, snapshot, task deletion) +/// 3. The tasks must all be related to the same index +/// +/// ## Return +/// `None` if the list of tasks is empty. Otherwise, an [`AutoBatch`] that represents +/// a subset of the given tasks. +pub fn autobatch( + enqueued: Vec<(TaskId, KindWithContent)>, + index_already_exists: bool, +) -> Option<(BatchKind, bool)> { + let mut enqueued = enqueued.into_iter(); + let (id, kind) = enqueued.next()?; + + // index_exist will keep track of if the index should exist at this point after the tasks we batched. + let mut index_exist = index_already_exists; + + let (mut acc, must_create_index) = match BatchKind::new(id, kind) { + (Continue(acc), create) => (acc, create), + (Break(acc), create) => return Some((acc, create)), + }; + + // if an index has been created in the previous step we can consider it as existing. + index_exist |= must_create_index; + + for (id, kind) in enqueued { + acc = match acc.accumulate(id, kind.into(), index_exist) { + Continue(acc) => acc, + Break(acc) => return Some((acc, must_create_index)), + }; + } + + Some((acc, must_create_index)) +} + +#[cfg(test)] +mod tests { + use meilisearch_types::tasks::IndexSwap; + use uuid::Uuid; + + use super::*; + use crate::debug_snapshot; + + fn autobatch_from( + index_already_exists: bool, + input: impl IntoIterator, + ) -> Option<(BatchKind, bool)> { + autobatch( + input.into_iter().enumerate().map(|(id, kind)| (id as TaskId, kind)).collect(), + index_already_exists, + ) + } + + fn doc_imp(method: IndexDocumentsMethod, allow_index_creation: bool) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: String::from("doggo"), + primary_key: None, + method, + content_file: Uuid::new_v4(), + documents_count: 0, + allow_index_creation, + } + } + + fn doc_del() -> KindWithContent { + KindWithContent::DocumentDeletion { + index_uid: String::from("doggo"), + documents_ids: Vec::new(), + } + } + + fn doc_clr() -> KindWithContent { + KindWithContent::DocumentClear { index_uid: String::from("doggo") } + } + + fn settings(allow_index_creation: bool) -> KindWithContent { + KindWithContent::SettingsUpdate { + index_uid: String::from("doggo"), + new_settings: Default::default(), + is_deletion: false, + allow_index_creation, + } + } + + fn idx_create() -> KindWithContent { + KindWithContent::IndexCreation { index_uid: String::from("doggo"), primary_key: None } + } + + fn idx_update() -> KindWithContent { + KindWithContent::IndexUpdate { index_uid: String::from("doggo"), primary_key: None } + } + + fn idx_del() -> KindWithContent { + KindWithContent::IndexDeletion { index_uid: String::from("doggo") } + } + + fn idx_swap() -> KindWithContent { + KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: (String::from("doggo"), String::from("catto")) }], + } + } + + #[test] + fn autobatch_simple_operation_together() { + // we can autobatch one or multiple `ReplaceDocuments` together. + // if the index exists. + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp( ReplaceDocuments, false ), doc_imp(ReplaceDocuments, false )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))"); + + // if it doesn't exists. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp( ReplaceDocuments, true ), doc_imp(ReplaceDocuments, true )]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + + // we can autobatch one or multiple `UpdateDocuments` together. + // if the index exists. + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))"); + + // if it doesn't exists. + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false), doc_imp(UpdateDocuments, false)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0, 1, 2] }, false))"); + + // we can autobatch one or multiple DocumentDeletion together + debug_snapshot!(autobatch_from(true, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); + + // we can autobatch one or multiple Settings together + debug_snapshot!(autobatch_from(true, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); + + debug_snapshot!(autobatch_from(false, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); + } + + #[test] + fn simple_document_operation_dont_autobatch_with_other() { + // addition, updates and deletion can't batch together + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(UpdateDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_del(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + } + + #[test] + fn document_addition_batch_with_settings() { + // simple case + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + + // multiple settings and doc addition + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + + // addition and setting unordered + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_imp(UpdateDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, import_ids: [0, 2] }, true))"); + + // We ensure this kind of batch doesn't batch with forbidden operations + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_imp(UpdateDocuments, true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_imp(ReplaceDocuments, true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + } + + #[test] + fn clear_and_additions() { + // these two doesn't need to batch + debug_snapshot!(autobatch_from(true, [doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentClear { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentClear { ids: [0] }, false))"); + + // Basic use case + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + + // This batch kind doesn't mix with other document addition + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_imp(UpdateDocuments, true)]), @"Some((DocumentClear { ids: [0, 1, 2] }, true))"); + + // But you can batch multiple clear together + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), doc_imp(UpdateDocuments, true), doc_clr(), doc_clr(), doc_clr()]), @"Some((DocumentClear { ids: [0, 1, 2, 3, 4] }, true))"); + } + + #[test] + fn clear_and_additions_and_settings() { + // A clear don't need to autobatch the settings that happens AFTER there is no documents + debug_snapshot!(autobatch_from(true, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(true, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))"); + } + + #[test] + fn anything_and_index_deletion() { + // The `IndexDeletion` doesn't batch with anything that happens AFTER. + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(UpdateDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(ReplaceDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_imp(UpdateDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(ReplaceDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(UpdateDocuments, true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(ReplaceDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_imp(UpdateDocuments, false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); + + // The index deletion can accept almost any type of `BatchKind` and transform it to an `IndexDeletion`. + // First, the basic cases + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + + // Then the mixed cases. + // The index already exists, whatever is the right of the tasks it shouldn't change the result. + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,false), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, false), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments,true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(UpdateDocuments, true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + + // When the index doesn't exists yet it's more complicated. + // Either the first task we encounter create it, in which case we can create a big batch with everything. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + // The right of the tasks following isn't really important. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, true), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))"); + // Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index. + // that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc. + // All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))"); + // The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what + // follows because we first need to process the erronous batch. + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments,false), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(UpdateDocuments, false), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + } + + #[test] + fn allowed_and_disallowed_index_creation() { + // `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists. + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, [doc_imp(ReplaceDocuments, false), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), doc_imp(ReplaceDocuments, true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, import_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), doc_imp(ReplaceDocuments, false)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, import_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, [doc_imp(ReplaceDocuments, false), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, import_ids: [0] }, false))"); + } +} diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs new file mode 100644 index 000000000..02cfdb178 --- /dev/null +++ b/index-scheduler/src/batch.rs @@ -0,0 +1,1286 @@ +/*! +This module handles the creation and processing of batch operations. + +A batch is a combination of multiple tasks that can be processed at once. +Executing a batch operation should always be functionally equivalent to +executing each of its tasks' operations individually and in order. + +For example, if the user sends two tasks: +1. import documents X +2. import documents Y + +We can combine the two tasks in a single batch: +1. import documents X and Y + +Processing this batch is functionally equivalent to processing the two +tasks individally, but should be much faster since we are only performing +one indexing operation. +*/ + +use std::collections::{BTreeSet, HashSet}; +use std::ffi::OsStr; +use std::fs::{self, File}; +use std::io::BufWriter; + +use dump::IndexMetadata; +use log::{debug, error, info}; +use meilisearch_types::heed::{RoTxn, RwTxn}; +use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::heed::CompactionOption; +use meilisearch_types::milli::update::{ + DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, + Settings as MilliSettings, +}; +use meilisearch_types::milli::{self, BEU32}; +use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; +use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task}; +use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; +use roaring::RoaringBitmap; +use time::macros::format_description; +use time::OffsetDateTime; +use uuid::Uuid; + +use crate::autobatcher::{self, BatchKind}; +use crate::utils::{self, swap_index_uid_in_task}; +use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId}; + +/// Represents a combination of tasks that can all be processed at the same time. +/// +/// A batch contains the set of tasks that it represents (accessible through +/// [`self.ids()`](Batch::ids)), as well as additional information on how to +/// be processed. +#[derive(Debug)] +pub(crate) enum Batch { + TaskCancelation { + /// The task cancelation itself. + task: Task, + /// The date and time at which the previously processing tasks started. + previous_started_at: OffsetDateTime, + /// The list of tasks that were processing when this task cancelation appeared. + previous_processing_tasks: RoaringBitmap, + }, + TaskDeletion(Task), + SnapshotCreation(Vec), + Dump(Task), + IndexOperation { + op: IndexOperation, + must_create_index: bool, + }, + IndexCreation { + index_uid: String, + primary_key: Option, + task: Task, + }, + IndexUpdate { + index_uid: String, + primary_key: Option, + task: Task, + }, + IndexDeletion { + index_uid: String, + tasks: Vec, + index_has_been_created: bool, + }, + IndexSwap { + task: Task, + }, +} + +/// A [batch](Batch) that combines multiple tasks operating on an index. +#[derive(Debug)] +pub(crate) enum IndexOperation { + DocumentImport { + index_uid: String, + primary_key: Option, + method: IndexDocumentsMethod, + documents_counts: Vec, + content_files: Vec, + tasks: Vec, + }, + DocumentDeletion { + index_uid: String, + // The vec associated with each document deletion tasks. + documents: Vec>, + tasks: Vec, + }, + DocumentClear { + index_uid: String, + tasks: Vec, + }, + Settings { + index_uid: String, + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + tasks: Vec, + }, + DocumentClearAndSetting { + index_uid: String, + cleared_tasks: Vec, + + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + settings_tasks: Vec, + }, + SettingsAndDocumentImport { + index_uid: String, + + primary_key: Option, + method: IndexDocumentsMethod, + documents_counts: Vec, + content_files: Vec, + document_import_tasks: Vec, + + // The boolean indicates if it's a settings deletion or creation. + settings: Vec<(bool, Settings)>, + settings_tasks: Vec, + }, +} + +impl Batch { + /// Return the task ids associated with this batch. + pub fn ids(&self) -> Vec { + match self { + Batch::TaskCancelation { task, .. } + | Batch::TaskDeletion(task) + | Batch::Dump(task) + | Batch::IndexCreation { task, .. } + | Batch::IndexUpdate { task, .. } => vec![task.uid], + Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => { + tasks.iter().map(|task| task.uid).collect() + } + Batch::IndexOperation { op, .. } => match op { + IndexOperation::DocumentImport { tasks, .. } + | IndexOperation::DocumentDeletion { tasks, .. } + | IndexOperation::Settings { tasks, .. } + | IndexOperation::DocumentClear { tasks, .. } => { + tasks.iter().map(|task| task.uid).collect() + } + IndexOperation::SettingsAndDocumentImport { + document_import_tasks: tasks, + settings_tasks: other, + .. + } + | IndexOperation::DocumentClearAndSetting { + cleared_tasks: tasks, + settings_tasks: other, + .. + } => tasks.iter().chain(other).map(|task| task.uid).collect(), + }, + Batch::IndexSwap { task } => vec![task.uid], + } + } +} + +impl IndexOperation { + pub fn index_uid(&self) -> &str { + match self { + IndexOperation::DocumentImport { index_uid, .. } + | IndexOperation::DocumentDeletion { index_uid, .. } + | IndexOperation::DocumentClear { index_uid, .. } + | IndexOperation::Settings { index_uid, .. } + | IndexOperation::DocumentClearAndSetting { index_uid, .. } + | IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid, + } + } +} + +impl IndexScheduler { + /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`]. + /// + /// ## Arguments + /// - `rtxn`: read transaction + /// - `index_uid`: name of the index affected by the operations of the autobatch + /// - `batch`: the result of the autobatcher + pub(crate) fn create_next_batch_index( + &self, + rtxn: &RoTxn, + index_uid: String, + batch: BatchKind, + must_create_index: bool, + ) -> Result> { + match batch { + BatchKind::DocumentClear { ids } => Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClear { + tasks: self.get_existing_tasks(rtxn, ids)?, + index_uid, + }, + must_create_index, + })), + BatchKind::DocumentImport { method, import_ids, .. } => { + let tasks = self.get_existing_tasks(rtxn, import_ids)?; + let primary_key = match &tasks[0].kind { + KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => { + primary_key.clone() + } + _ => unreachable!(), + }; + + let mut documents_counts = Vec::new(); + let mut content_files = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::DocumentAdditionOrUpdate { + content_file, + documents_count, + .. + } => { + documents_counts.push(documents_count); + content_files.push(content_file); + } + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + tasks, + }, + must_create_index, + })) + } + BatchKind::DocumentDeletion { deletion_ids } => { + let tasks = self.get_existing_tasks(rtxn, deletion_ids)?; + + let mut documents = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::DocumentDeletion { ref documents_ids, .. } => { + documents.push(documents_ids.clone()) + } + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentDeletion { index_uid, documents, tasks }, + must_create_index, + })) + } + BatchKind::Settings { settings_ids, .. } => { + let tasks = self.get_existing_tasks(rtxn, settings_ids)?; + + let mut settings = Vec::new(); + for task in &tasks { + match task.kind { + KindWithContent::SettingsUpdate { + ref new_settings, is_deletion, .. + } => settings.push((is_deletion, *new_settings.clone())), + _ => unreachable!(), + } + } + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks }, + must_create_index, + })) + } + BatchKind::ClearAndSettings { other, settings_ids, allow_index_creation } => { + let (index_uid, settings, settings_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::Settings { settings_ids, allow_index_creation }, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::Settings { index_uid, settings, tasks, .. }, + .. + } => (index_uid, settings, tasks), + _ => unreachable!(), + }; + let (index_uid, cleared_tasks) = match self + .create_next_batch_index( + rtxn, + index_uid, + BatchKind::DocumentClear { ids: other }, + must_create_index, + )? + .unwrap() + { + Batch::IndexOperation { + op: IndexOperation::DocumentClear { index_uid, tasks }, + .. + } => (index_uid, tasks), + _ => unreachable!(), + }; + + Ok(Some(Batch::IndexOperation { + op: IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + }, + must_create_index, + })) + } + BatchKind::SettingsAndDocumentImport { + settings_ids, + method, + allow_index_creation, + import_ids, + } => { + let settings = self.create_next_batch_index( + rtxn, + index_uid.clone(), + BatchKind::Settings { settings_ids, allow_index_creation }, + must_create_index, + )?; + + let document_import = self.create_next_batch_index( + rtxn, + index_uid.clone(), + BatchKind::DocumentImport { method, allow_index_creation, import_ids }, + must_create_index, + )?; + + match (document_import, settings) { + ( + Some(Batch::IndexOperation { + op: + IndexOperation::DocumentImport { + primary_key, + documents_counts, + content_files, + tasks: document_import_tasks, + .. + }, + .. + }), + Some(Batch::IndexOperation { + op: IndexOperation::Settings { settings, tasks: settings_tasks, .. }, + .. + }), + ) => Ok(Some(Batch::IndexOperation { + op: IndexOperation::SettingsAndDocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + document_import_tasks, + settings, + settings_tasks, + }, + must_create_index, + })), + _ => unreachable!(), + } + } + BatchKind::IndexCreation { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + let (index_uid, primary_key) = match &task.kind { + KindWithContent::IndexCreation { index_uid, primary_key } => { + (index_uid.clone(), primary_key.clone()) + } + _ => unreachable!(), + }; + Ok(Some(Batch::IndexCreation { index_uid, primary_key, task })) + } + BatchKind::IndexUpdate { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + let primary_key = match &task.kind { + KindWithContent::IndexUpdate { primary_key, .. } => primary_key.clone(), + _ => unreachable!(), + }; + Ok(Some(Batch::IndexUpdate { index_uid, primary_key, task })) + } + BatchKind::IndexDeletion { ids } => Ok(Some(Batch::IndexDeletion { + index_uid, + index_has_been_created: must_create_index, + tasks: self.get_existing_tasks(rtxn, ids)?, + })), + BatchKind::IndexSwap { id } => { + let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; + Ok(Some(Batch::IndexSwap { task })) + } + } + } + + /// Create the next batch to be processed; + /// 1. We get the *last* task to cancel. + /// 2. We get the *next* task to delete. + /// 3. We get the *next* snapshot to process. + /// 4. We get the *next* dump to process. + /// 5. We get the *next* tasks to process for a specific index. + pub(crate) fn create_next_batch(&self, rtxn: &RoTxn) -> Result> { + #[cfg(test)] + self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?; + + let enqueued = &self.get_status(rtxn, Status::Enqueued)?; + let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued; + + // 1. we get the last task to cancel. + if let Some(task_id) = to_cancel.max() { + // We retrieve the tasks that were processing before this tasks cancelation started. + // We must *not* reset the processing tasks before calling this method. + let ProcessingTasks { started_at, processing } = + &*self.processing_tasks.read().unwrap(); + return Ok(Some(Batch::TaskCancelation { + task: self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?, + previous_started_at: *started_at, + previous_processing_tasks: processing.clone(), + })); + } + + // 2. we get the next task to delete + let to_delete = self.get_kind(rtxn, Kind::TaskDeletion)? & enqueued; + if let Some(task_id) = to_delete.min() { + let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + return Ok(Some(Batch::TaskDeletion(task))); + } + + // 3. we batch the snapshot. + let to_snapshot = self.get_kind(rtxn, Kind::SnapshotCreation)? & enqueued; + if !to_snapshot.is_empty() { + return Ok(Some(Batch::SnapshotCreation(self.get_existing_tasks(rtxn, to_snapshot)?))); + } + + // 4. we batch the dumps. + let to_dump = self.get_kind(rtxn, Kind::DumpCreation)? & enqueued; + if let Some(to_dump) = to_dump.min() { + return Ok(Some(Batch::Dump( + self.get_task(rtxn, to_dump)?.ok_or(Error::CorruptedTaskQueue)?, + ))); + } + + // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task. + let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) }; + let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + // If the task is not associated with any index, verify that it is an index swap and + // create the batch directly. Otherwise, get the index name associated with the task + // and use the autobatcher to batch the enqueued tasks associated with it + + let index_name = if let Some(&index_name) = task.indexes().first() { + index_name + } else { + assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty())); + return Ok(Some(Batch::IndexSwap { task })); + }; + + let index_already_exists = self.index_mapper.exists(rtxn, index_name)?; + + let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued; + + // If autobatching is disabled we only take one task at a time. + let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 }; + + let enqueued = index_tasks + .into_iter() + .take(tasks_limit) + .map(|task_id| { + self.get_task(rtxn, task_id) + .and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + .map(|task| (task.uid, task.kind)) + }) + .collect::>>()?; + + if let Some((batchkind, create_index)) = + autobatcher::autobatch(enqueued, index_already_exists) + { + return self.create_next_batch_index( + rtxn, + index_name.to_string(), + batchkind, + create_index, + ); + } + + // If we found no tasks then we were notified for something that got autobatched + // somehow and there is nothing to do. + Ok(None) + } + + /// Apply the operation associated with the given batch. + /// + /// ## Return + /// The list of tasks that were processed. The metadata of each task in the returned + /// list is updated accordingly, with the exception of the its date fields + /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). + pub(crate) fn process_batch(&self, batch: Batch) -> Result> { + #[cfg(test)] + { + self.maybe_fail(crate::tests::FailureLocation::InsideProcessBatch)?; + self.maybe_fail(crate::tests::FailureLocation::PanicInsideProcessBatch)?; + self.breakpoint(crate::Breakpoint::InsideProcessBatch); + } + match batch { + Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let matched_tasks = + if let KindWithContent::TaskCancelation { tasks, query: _ } = &task.kind { + tasks + } else { + unreachable!() + }; + + let mut wtxn = self.env.write_txn()?; + let canceled_tasks_content_uuids = self.cancel_matched_tasks( + &mut wtxn, + task.uid, + matched_tasks, + previous_started_at, + &previous_processing_tasks, + )?; + + task.status = Status::Succeeded; + match &mut task.details { + Some(Details::TaskCancelation { + matched_tasks: _, + canceled_tasks, + original_filter: _, + }) => { + *canceled_tasks = Some(canceled_tasks_content_uuids.len() as u64); + } + _ => unreachable!(), + } + + // We must only remove the content files if the transaction is successfully committed + // and if errors occurs when we are deleting files we must do our best to delete + // everything. We do not return the encountered errors when deleting the content + // files as it is not a breaking operation and we can safely continue our job. + match wtxn.commit() { + Ok(()) => { + for content_uuid in canceled_tasks_content_uuids { + if let Err(error) = self.delete_update_file(content_uuid) { + error!( + "We failed deleting the content file indentified as {}: {}", + content_uuid, error + ) + } + } + } + Err(e) => return Err(e.into()), + } + + Ok(vec![task]) + } + Batch::TaskDeletion(mut task) => { + // 1. Retrieve the tasks that matched the query at enqueue-time. + let matched_tasks = + if let KindWithContent::TaskDeletion { tasks, query: _ } = &task.kind { + tasks + } else { + unreachable!() + }; + + let mut wtxn = self.env.write_txn()?; + let deleted_tasks_count = self.delete_matched_tasks(&mut wtxn, matched_tasks)?; + + task.status = Status::Succeeded; + match &mut task.details { + Some(Details::TaskDeletion { + matched_tasks: _, + deleted_tasks, + original_filter: _, + }) => { + *deleted_tasks = Some(deleted_tasks_count); + } + _ => unreachable!(), + } + wtxn.commit()?; + Ok(vec![task]) + } + Batch::SnapshotCreation(mut tasks) => { + fs::create_dir_all(&self.snapshots_path)?; + let temp_snapshot_dir = tempfile::tempdir()?; + + // 1. Snapshot the version file. + let dst = temp_snapshot_dir.path().join(VERSION_FILE_NAME); + fs::copy(&self.version_file_path, dst)?; + + // 2. Snapshot the index-scheduler LMDB env + // + // When we call copy_to_path, LMDB opens a read transaction by itself, + // we can't provide our own. It is an issue as we would like to know + // the update files to copy but new ones can be enqueued between the copy + // of the env and the new transaction we open to retrieve the enqueued tasks. + // So we prefer opening a new transaction after copying the env and copy more + // update files than not enough. + // + // Note that there cannot be any update files deleted between those + // two read operations as the task processing is synchronous. + + // 2.1 First copy the LMDB env of the index-scheduler + let dst = temp_snapshot_dir.path().join("tasks"); + fs::create_dir_all(&dst)?; + self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 2.2 Create a read transaction on the index-scheduler + let rtxn = self.env.read_txn()?; + + // 2.3 Create the update files directory + let update_files_dir = temp_snapshot_dir.path().join("update_files"); + fs::create_dir_all(&update_files_dir)?; + + // 2.4 Only copy the update files of the enqueued tasks + for task_id in self.get_status(&rtxn, Status::Enqueued)? { + let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + if let Some(content_uuid) = task.content_uuid() { + let src = self.file_store.get_update_path(content_uuid); + let dst = update_files_dir.join(content_uuid.to_string()); + fs::copy(src, dst)?; + } + } + + // 3. Snapshot every indexes + // TODO we are opening all of the indexes it can be too much we should unload all + // of the indexes we are trying to open. It would be even better to only unload + // the ones that were opened by us. Or maybe use a LRU in the index mapper. + for result in self.index_mapper.index_mapping.iter(&rtxn)? { + let (name, uuid) = result?; + let index = self.index_mapper.index(&rtxn, name)?; + let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); + fs::create_dir_all(&dst)?; + index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + } + + drop(rtxn); + + // 4. Snapshot the auth LMDB env + let dst = temp_snapshot_dir.path().join("auth"); + fs::create_dir_all(&dst)?; + // TODO We can't use the open_auth_store_env function here but we should + let auth = milli::heed::EnvOpenOptions::new() + .map_size(1024 * 1024 * 1024) // 1 GiB + .max_dbs(2) + .open(&self.auth_path)?; + auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?; + + // 5. Copy and tarball the flat snapshot + // 5.1 Find the original name of the database + // TODO find a better way to get this path + let mut base_path = self.env.path().to_owned(); + base_path.pop(); + let db_name = base_path.file_name().and_then(OsStr::to_str).unwrap_or("data.ms"); + + // 5.2 Tarball the content of the snapshot in a tempfile with a .snapshot extension + let snapshot_path = self.snapshots_path.join(format!("{}.snapshot", db_name)); + let temp_snapshot_file = tempfile::NamedTempFile::new_in(&self.snapshots_path)?; + compression::to_tar_gz(temp_snapshot_dir.path(), temp_snapshot_file.path())?; + let file = temp_snapshot_file.persist(&snapshot_path)?; + + // 5.3 Change the permission to make the snapshot readonly + let mut permissions = file.metadata()?.permissions(); + permissions.set_readonly(true); + file.set_permissions(permissions)?; + + for task in &mut tasks { + task.status = Status::Succeeded; + } + + Ok(tasks) + } + Batch::Dump(mut task) => { + let started_at = OffsetDateTime::now_utc(); + let (keys, instance_uid) = + if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { + (keys, instance_uid) + } else { + unreachable!(); + }; + let dump = dump::DumpWriter::new(*instance_uid)?; + + // 1. dump the keys + let mut dump_keys = dump.create_keys()?; + for key in keys { + dump_keys.push_key(key)?; + } + dump_keys.flush()?; + + let rtxn = self.env.read_txn()?; + + // 2. dump the tasks + let mut dump_tasks = dump.create_tasks_queue()?; + for ret in self.all_tasks.iter(&rtxn)? { + let (_, mut t) = ret?; + let status = t.status; + let content_file = t.content_uuid(); + + // In the case we're dumping ourselves we want to be marked as finished + // to not loop over ourselves indefinitely. + if t.uid == task.uid { + let finished_at = OffsetDateTime::now_utc(); + + // We're going to fake the date because we don't know if everything is going to go well. + // But we need to dump the task as finished and successful. + // If something fail everything will be set appropriately in the end. + t.status = Status::Succeeded; + t.started_at = Some(started_at); + t.finished_at = Some(finished_at); + } + let mut dump_content_file = dump_tasks.push_task(&t.into())?; + + // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet. + if let Some(content_file) = content_file { + if status == Status::Enqueued { + let content_file = self.file_store.get_update(content_file)?; + + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(milli::Error::from)?; + + let (mut cursor, documents_batch_index) = + reader.into_cursor_and_fields_index(); + + while let Some(doc) = + cursor.next_document().map_err(milli::Error::from)? + { + dump_content_file.push_document(&obkv_to_object( + &doc, + &documents_batch_index, + )?)?; + } + dump_content_file.flush()?; + } + } + } + dump_tasks.flush()?; + + // 3. Dump the indexes + for (uid, index) in self.index_mapper.indexes(&rtxn)? { + let rtxn = index.read_txn()?; + let metadata = IndexMetadata { + uid: uid.clone(), + primary_key: index.primary_key(&rtxn)?.map(String::from), + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + }; + let mut index_dumper = dump.create_index(&uid, &metadata)?; + + let fields_ids_map = index.fields_ids_map(&rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + // 3.1. Dump the documents + for ret in index.all_documents(&rtxn)? { + let (_id, doc) = ret?; + let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?; + index_dumper.push_document(&document)?; + } + + // 3.2. Dump the settings + let settings = meilisearch_types::settings::settings(&index, &rtxn)?; + index_dumper.settings(&settings)?; + } + + let dump_uid = started_at.format(format_description!( + "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" + )).unwrap(); + + let path = self.dumps_path.join(format!("{}.dump", dump_uid)); + let file = File::create(path)?; + dump.persist_to(BufWriter::new(file))?; + + // if we reached this step we can tell the scheduler we succeeded to dump ourselves. + task.status = Status::Succeeded; + task.details = Some(Details::Dump { dump_uid: Some(dump_uid) }); + Ok(vec![task]) + } + Batch::IndexOperation { op, must_create_index } => { + let index_uid = op.index_uid(); + let index = if must_create_index { + // create the index if it doesn't already exist + let wtxn = self.env.write_txn()?; + self.index_mapper.create_index(wtxn, index_uid)? + } else { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, index_uid)? + }; + + let mut index_wtxn = index.write_txn()?; + let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; + index_wtxn.commit()?; + + Ok(tasks) + } + Batch::IndexCreation { index_uid, primary_key, task } => { + let wtxn = self.env.write_txn()?; + if self.index_mapper.exists(&wtxn, &index_uid)? { + return Err(Error::IndexAlreadyExists(index_uid)); + } + self.index_mapper.create_index(wtxn, &index_uid)?; + + self.process_batch(Batch::IndexUpdate { index_uid, primary_key, task }) + } + Batch::IndexUpdate { index_uid, primary_key, mut task } => { + let rtxn = self.env.read_txn()?; + let index = self.index_mapper.index(&rtxn, &index_uid)?; + + if let Some(primary_key) = primary_key.clone() { + let mut index_wtxn = index.write_txn()?; + let mut builder = MilliSettings::new( + &mut index_wtxn, + &index, + self.index_mapper.indexer_config(), + ); + builder.set_primary_key(primary_key); + let must_stop_processing = self.must_stop_processing.clone(); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.get(), + )?; + index_wtxn.commit()?; + } + task.status = Status::Succeeded; + task.details = Some(Details::IndexInfo { primary_key }); + + Ok(vec![task]) + } + Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { + let wtxn = self.env.write_txn()?; + + // it's possible that the index doesn't exist + let number_of_documents = || -> Result { + let index = self.index_mapper.index(&wtxn, &index_uid)?; + let index_rtxn = index.read_txn()?; + Ok(index.number_of_documents(&index_rtxn)?) + }() + .unwrap_or_default(); + + // The write transaction is directly owned and commited inside. + match self.index_mapper.delete_index(wtxn, &index_uid) { + Ok(()) => (), + Err(Error::IndexNotFound(_)) if index_has_been_created => (), + Err(e) => return Err(e), + } + + // We set all the tasks details to the default value. + for task in &mut tasks { + task.status = Status::Succeeded; + task.details = match &task.kind { + KindWithContent::IndexDeletion { .. } => { + Some(Details::ClearAll { deleted_documents: Some(number_of_documents) }) + } + otherwise => otherwise.default_finished_details(), + }; + } + + Ok(tasks) + } + Batch::IndexSwap { mut task } => { + let mut wtxn = self.env.write_txn()?; + let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { + swaps + } else { + unreachable!() + }; + let mut not_found_indexes = BTreeSet::new(); + for IndexSwap { indexes: (lhs, rhs) } in swaps { + for index in [lhs, rhs] { + let index_exists = self.index_mapper.index_exists(&wtxn, index)?; + if !index_exists { + not_found_indexes.insert(index); + } + } + } + if !not_found_indexes.is_empty() { + if not_found_indexes.len() == 1 { + return Err(Error::IndexNotFound( + not_found_indexes.into_iter().next().unwrap().clone(), + )); + } else { + return Err(Error::IndexesNotFound( + not_found_indexes.into_iter().cloned().collect(), + )); + } + } + for swap in swaps { + self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?; + } + wtxn.commit()?; + task.status = Status::Succeeded; + Ok(vec![task]) + } + } + } + + /// Swap the index `lhs` with the index `rhs`. + fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> { + // 1. Verify that both lhs and rhs are existing indexes + let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; + if !index_lhs_exists { + return Err(Error::IndexNotFound(lhs.to_owned())); + } + let index_rhs_exists = self.index_mapper.index_exists(wtxn, rhs)?; + if !index_rhs_exists { + return Err(Error::IndexNotFound(rhs.to_owned())); + } + + // 2. Get the task set for index = name that appeared before the index swap task + let mut index_lhs_task_ids = self.index_tasks(wtxn, lhs)?; + index_lhs_task_ids.remove_range(task_id..); + let mut index_rhs_task_ids = self.index_tasks(wtxn, rhs)?; + index_rhs_task_ids.remove_range(task_id..); + + // 3. before_name -> new_name in the task's KindWithContent + for task_id in &index_lhs_task_ids | &index_rhs_task_ids { + let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + swap_index_uid_in_task(&mut task, (lhs, rhs)); + self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?; + } + + // 4. remove the task from indexuid = before_name + // 5. add the task to indexuid = after_name + self.update_index(wtxn, lhs, |lhs_tasks| { + *lhs_tasks -= &index_lhs_task_ids; + *lhs_tasks |= &index_rhs_task_ids; + })?; + self.update_index(wtxn, rhs, |rhs_tasks| { + *rhs_tasks -= &index_rhs_task_ids; + *rhs_tasks |= &index_lhs_task_ids; + })?; + + // 6. Swap in the index mapper + self.index_mapper.swap(wtxn, lhs, rhs)?; + + Ok(()) + } + + /// Process the index operation on the given index. + /// + /// ## Return + /// The list of processed tasks. + fn apply_index_operation<'txn, 'i>( + &self, + index_wtxn: &'txn mut RwTxn<'i, '_>, + index: &'i Index, + operation: IndexOperation, + ) -> Result> { + match operation { + IndexOperation::DocumentClear { mut tasks, .. } => { + let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?; + + let mut first_clear_found = false; + for task in &mut tasks { + task.status = Status::Succeeded; + // The first document clear will effectively delete every documents + // in the database but the next ones will clear 0 documents. + task.details = match &task.kind { + KindWithContent::DocumentClear { .. } => { + let count = if first_clear_found { 0 } else { count }; + first_clear_found = true; + Some(Details::ClearAll { deleted_documents: Some(count) }) + } + otherwise => otherwise.default_details(), + }; + } + + Ok(tasks) + } + IndexOperation::DocumentImport { + index_uid: _, + primary_key, + method, + documents_counts, + content_files, + mut tasks, + } => { + let mut primary_key_has_been_set = false; + let must_stop_processing = self.must_stop_processing.clone(); + let indexer_config = self.index_mapper.indexer_config(); + // TODO use the code from the IndexCreate operation + if let Some(primary_key) = primary_key { + if index.primary_key(index_wtxn)?.is_none() { + let mut builder = + milli::update::Settings::new(index_wtxn, index, indexer_config); + builder.set_primary_key(primary_key); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.clone().get(), + )?; + primary_key_has_been_set = true; + } + } + + let config = IndexDocumentsConfig { update_method: method, ..Default::default() }; + + let mut builder = milli::update::IndexDocuments::new( + index_wtxn, + index, + indexer_config, + config, + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.get(), + )?; + + let mut results = Vec::new(); + for content_uuid in content_files.into_iter() { + let content_file = self.file_store.get_update(content_uuid)?; + let reader = DocumentsBatchReader::from_reader(content_file) + .map_err(milli::Error::from)?; + let (new_builder, user_result) = builder.add_documents(reader)?; + builder = new_builder; + + let user_result = match user_result { + Ok(count) => Ok(DocumentAdditionResult { + indexed_documents: count, + number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details. + }), + Err(e) => Err(milli::Error::from(e)), + }; + + results.push(user_result); + } + + if results.iter().any(|res| res.is_ok()) { + let addition = builder.execute()?; + info!("document addition done: {:?}", addition); + } else if primary_key_has_been_set { + // Everything failed but we've set a primary key. + // We need to remove it. + let mut builder = + milli::update::Settings::new(index_wtxn, index, indexer_config); + builder.reset_primary_key(); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.clone().get(), + )?; + } + + for (task, (ret, count)) in + tasks.iter_mut().zip(results.into_iter().zip(documents_counts)) + { + match ret { + Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentAdditionOrUpdate { + received_documents: number_of_documents, + indexed_documents: Some(indexed_documents), + }); + } + Err(error) => { + task.status = Status::Failed; + task.details = Some(Details::DocumentAdditionOrUpdate { + received_documents: count, + indexed_documents: Some(count), + }); + task.error = Some(error.into()) + } + } + } + + Ok(tasks) + } + IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => { + let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?; + documents.iter().flatten().for_each(|id| { + builder.delete_external_id(id); + }); + + let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?; + + for (task, documents) in tasks.iter_mut().zip(documents) { + task.status = Status::Succeeded; + task.details = Some(Details::DocumentDeletion { + provided_ids: documents.len(), + deleted_documents: Some(deleted_documents.min(documents.len() as u64)), + }); + } + + Ok(tasks) + } + IndexOperation::Settings { index_uid: _, settings, mut tasks } => { + let indexer_config = self.index_mapper.indexer_config(); + let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); + + for (task, (_, settings)) in tasks.iter_mut().zip(settings) { + let checked_settings = settings.clone().check(); + task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) }); + apply_settings_to_builder(&checked_settings, &mut builder); + + // We can apply the status right now and if an update fail later + // the whole batch will be marked as failed. + task.status = Status::Succeeded; + } + + let must_stop_processing = self.must_stop_processing.clone(); + builder.execute( + |indexing_step| debug!("update: {:?}", indexing_step), + || must_stop_processing.get(), + )?; + + Ok(tasks) + } + IndexOperation::SettingsAndDocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + document_import_tasks, + settings, + settings_tasks, + } => { + let settings_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::Settings { + index_uid: index_uid.clone(), + settings, + tasks: settings_tasks, + }, + )?; + + let mut import_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::DocumentImport { + index_uid, + primary_key, + method, + documents_counts, + content_files, + tasks: document_import_tasks, + }, + )?; + + let mut tasks = settings_tasks; + tasks.append(&mut import_tasks); + Ok(tasks) + } + IndexOperation::DocumentClearAndSetting { + index_uid, + cleared_tasks, + settings, + settings_tasks, + } => { + let mut import_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::DocumentClear { + index_uid: index_uid.clone(), + tasks: cleared_tasks, + }, + )?; + + let settings_tasks = self.apply_index_operation( + index_wtxn, + index, + IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, + )?; + + let mut tasks = settings_tasks; + tasks.append(&mut import_tasks); + Ok(tasks) + } + } + } + + /// Delete each given task from all the databases (if it is deleteable). + /// + /// Return the number of tasks that were actually deleted. + fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result { + // 1. Remove from this list the tasks that we are not allowed to delete + let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; + let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); + + let all_task_ids = self.all_task_ids(wtxn)?; + let mut to_delete_tasks = all_task_ids & matched_tasks; + to_delete_tasks -= processing_tasks; + to_delete_tasks -= enqueued_tasks; + + // 2. We now have a list of tasks to delete, delete them + + let mut affected_indexes = HashSet::new(); + let mut affected_statuses = HashSet::new(); + let mut affected_kinds = HashSet::new(); + let mut affected_canceled_by = RoaringBitmap::new(); + + for task_id in to_delete_tasks.iter() { + let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; + + affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned())); + affected_statuses.insert(task.status); + affected_kinds.insert(task.kind.as_kind()); + // Note: don't delete the persisted task data since + // we can only delete succeeded, failed, and canceled tasks. + // In each of those cases, the persisted data is supposed to + // have been deleted already. + utils::remove_task_datetime(wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; + if let Some(started_at) = task.started_at { + utils::remove_task_datetime(wtxn, self.started_at, started_at, task.uid)?; + } + if let Some(finished_at) = task.finished_at { + utils::remove_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; + } + if let Some(canceled_by) = task.canceled_by { + affected_canceled_by.insert(canceled_by); + } + } + + for index in affected_indexes { + self.update_index(wtxn, &index, |bitmap| *bitmap -= &to_delete_tasks)?; + } + + for status in affected_statuses { + self.update_status(wtxn, status, |bitmap| *bitmap -= &to_delete_tasks)?; + } + + for kind in affected_kinds { + self.update_kind(wtxn, kind, |bitmap| *bitmap -= &to_delete_tasks)?; + } + + for task in to_delete_tasks.iter() { + self.all_tasks.delete(wtxn, &BEU32::new(task))?; + } + for canceled_by in affected_canceled_by { + let canceled_by = BEU32::new(canceled_by); + if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { + tasks -= &to_delete_tasks; + if tasks.is_empty() { + self.canceled_by.delete(wtxn, &canceled_by)?; + } else { + self.canceled_by.put(wtxn, &canceled_by, &tasks)?; + } + } + } + + Ok(to_delete_tasks.len()) + } + + /// Cancel each given task from all the databases (if it is cancelable). + /// + /// Returns the content files that the transaction owner must delete if the commit is successful. + fn cancel_matched_tasks( + &self, + wtxn: &mut RwTxn, + cancel_task_id: TaskId, + matched_tasks: &RoaringBitmap, + previous_started_at: OffsetDateTime, + previous_processing_tasks: &RoaringBitmap, + ) -> Result> { + let now = OffsetDateTime::now_utc(); + + // 1. Remove from this list the tasks that we are not allowed to cancel + // Notice that only the _enqueued_ ones are cancelable and we should + // have already aborted the indexation of the _processing_ ones + let cancelable_tasks = self.get_status(wtxn, Status::Enqueued)?; + let tasks_to_cancel = cancelable_tasks & matched_tasks; + + // 2. We now have a list of tasks to cancel, cancel them + let mut content_files_to_delete = Vec::new(); + for mut task in self.get_existing_tasks(wtxn, tasks_to_cancel.iter())? { + if let Some(uuid) = task.content_uuid() { + content_files_to_delete.push(uuid); + } + if previous_processing_tasks.contains(task.uid) { + task.started_at = Some(previous_started_at); + } + task.status = Status::Canceled; + task.canceled_by = Some(cancel_task_id); + task.finished_at = Some(now); + task.details = task.details.map(|d| d.to_failed()); + self.update_task(wtxn, &task)?; + } + self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?; + + Ok(content_files_to_delete) + } +} diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs new file mode 100644 index 000000000..cfbf7a25e --- /dev/null +++ b/index-scheduler/src/error.rs @@ -0,0 +1,130 @@ +use meilisearch_types::error::{Code, ErrorCode}; +use meilisearch_types::tasks::{Kind, Status}; +use meilisearch_types::{heed, milli}; +use thiserror::Error; + +use crate::TaskId; + +#[allow(clippy::large_enum_variant)] +#[derive(Error, Debug)] +pub enum Error { + #[error("Index `{0}` not found.")] + IndexNotFound(String), + #[error( + "Indexes {} not found.", + .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") + )] + IndexesNotFound(Vec), + #[error("Index `{0}` already exists.")] + IndexAlreadyExists(String), + #[error( + "Indexes must be declared only once during a swap. `{0}` was specified several times." + )] + SwapDuplicateIndexFound(String), + #[error( + "Indexes must be declared only once during a swap. {} were specified several times.", + .0.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") + )] + SwapDuplicateIndexesFound(Vec), + #[error("Corrupted dump.")] + CorruptedDump, + #[error( + "Task `{field}` `{date}` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format." + )] + InvalidTaskDate { field: String, date: String }, + #[error("Task uid `{task_uid}` is invalid. It should only contain numeric characters.")] + InvalidTaskUids { task_uid: String }, + #[error( + "Task status `{status}` is invalid. Available task statuses are {}.", + enum_iterator::all::() + .map(|s| format!("`{s}`")) + .collect::>() + .join(", ") + )] + InvalidTaskStatuses { status: String }, + #[error( + "Task type `{type_}` is invalid. Available task types are {}", + enum_iterator::all::() + .map(|s| format!("`{s}`")) + .collect::>() + .join(", ") + )] + InvalidTaskTypes { type_: String }, + #[error( + "Task canceledBy `{canceled_by}` is invalid. It should only contains numeric characters separated by `,` character." + )] + InvalidTaskCanceledBy { canceled_by: String }, + #[error( + "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)." + )] + InvalidIndexUid { index_uid: String }, + #[error("Task `{0}` not found.")] + TaskNotFound(TaskId), + #[error("Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] + TaskDeletionWithEmptyQuery, + #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")] + TaskCancelationWithEmptyQuery, + + #[error(transparent)] + Dump(#[from] dump::Error), + #[error(transparent)] + Heed(#[from] heed::Error), + #[error(transparent)] + Milli(#[from] milli::Error), + #[error("An unexpected crash occurred when processing the task.")] + ProcessBatchPanicked, + #[error(transparent)] + FileStore(#[from] file_store::Error), + #[error(transparent)] + IoError(#[from] std::io::Error), + #[error(transparent)] + Persist(#[from] tempfile::PersistError), + + #[error(transparent)] + Anyhow(#[from] anyhow::Error), + + // Irrecoverable errors: + #[error(transparent)] + CreateBatch(Box), + #[error("Corrupted task queue.")] + CorruptedTaskQueue, + #[error(transparent)] + TaskDatabaseUpdate(Box), + #[error(transparent)] + HeedTransaction(heed::Error), +} + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + Error::IndexNotFound(_) => Code::IndexNotFound, + Error::IndexesNotFound(_) => Code::IndexNotFound, + Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists, + Error::SwapDuplicateIndexesFound(_) => Code::DuplicateIndexFound, + Error::SwapDuplicateIndexFound(_) => Code::DuplicateIndexFound, + Error::InvalidTaskDate { .. } => Code::InvalidTaskDateFilter, + Error::InvalidTaskUids { .. } => Code::InvalidTaskUidsFilter, + Error::InvalidTaskStatuses { .. } => Code::InvalidTaskStatusesFilter, + Error::InvalidTaskTypes { .. } => Code::InvalidTaskTypesFilter, + Error::InvalidTaskCanceledBy { .. } => Code::InvalidTaskCanceledByFilter, + Error::InvalidIndexUid { .. } => Code::InvalidIndexUid, + Error::TaskNotFound(_) => Code::TaskNotFound, + Error::TaskDeletionWithEmptyQuery => Code::TaskDeletionWithEmptyQuery, + Error::TaskCancelationWithEmptyQuery => Code::TaskCancelationWithEmptyQuery, + Error::Dump(e) => e.error_code(), + Error::Milli(e) => e.error_code(), + Error::ProcessBatchPanicked => Code::Internal, + // TODO: TAMO: are all these errors really internal? + Error::Heed(_) => Code::Internal, + Error::FileStore(_) => Code::Internal, + Error::IoError(_) => Code::Internal, + Error::Persist(_) => Code::Internal, + Error::Anyhow(_) => Code::Internal, + Error::CorruptedTaskQueue => Code::Internal, + Error::CorruptedDump => Code::Internal, + Error::TaskDatabaseUpdate(_) => Code::Internal, + Error::CreateBatch(_) => Code::Internal, + Error::HeedTransaction(_) => Code::Internal, + } + } +} diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs new file mode 100644 index 000000000..a647012fe --- /dev/null +++ b/index-scheduler/src/index_mapper.rs @@ -0,0 +1,233 @@ +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; +use std::{fs, thread}; + +use log::error; +use meilisearch_types::heed::types::{SerdeBincode, Str}; +use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn}; +use meilisearch_types::milli::update::IndexerConfig; +use meilisearch_types::milli::Index; +use uuid::Uuid; + +use self::IndexStatus::{Available, BeingDeleted}; +use crate::{Error, Result}; + +const INDEX_MAPPING: &str = "index-mapping"; + +/// Structure managing meilisearch's indexes. +/// +/// It is responsible for: +/// 1. Creating new indexes +/// 2. Opening indexes and storing references to these opened indexes +/// 3. Accessing indexes through their uuid +/// 4. Mapping a user-defined name to each index uuid. +#[derive(Clone)] +pub struct IndexMapper { + /// Keep track of the opened indexes. Used mainly by the index resolver. + index_map: Arc>>, + + // TODO create a UUID Codec that uses the 16 bytes representation + /// Map an index name with an index uuid currently available on disk. + pub(crate) index_mapping: Database>, + + /// Path to the folder where the LMDB environments of each index are. + base_path: PathBuf, + index_size: usize, + pub indexer_config: Arc, +} + +/// Whether the index is available for use or is forbidden to be inserted back in the index map +#[allow(clippy::large_enum_variant)] +#[derive(Clone)] +pub enum IndexStatus { + /// Do not insert it back in the index map as it is currently being deleted. + BeingDeleted, + /// You can use the index without worrying about anything. + Available(Index), +} + +impl IndexMapper { + pub fn new( + env: &Env, + base_path: PathBuf, + index_size: usize, + indexer_config: IndexerConfig, + ) -> Result { + Ok(Self { + index_map: Arc::default(), + index_mapping: env.create_database(Some(INDEX_MAPPING))?, + base_path, + index_size, + indexer_config: Arc::new(indexer_config), + }) + } + + /// Create or open an index in the specified path. + /// The path *must* exists or an error will be thrown. + fn create_or_open_index(&self, path: &Path) -> Result { + let mut options = EnvOpenOptions::new(); + options.map_size(self.index_size); + options.max_readers(1024); + Ok(Index::new(options, path)?) + } + + /// Get or create the index. + pub fn create_index(&self, mut wtxn: RwTxn, name: &str) -> Result { + match self.index(&wtxn, name) { + Ok(index) => { + wtxn.commit()?; + Ok(index) + } + Err(Error::IndexNotFound(_)) => { + let uuid = Uuid::new_v4(); + self.index_mapping.put(&mut wtxn, name, &uuid)?; + + let index_path = self.base_path.join(uuid.to_string()); + fs::create_dir_all(&index_path)?; + let index = self.create_or_open_index(&index_path)?; + + wtxn.commit()?; + // TODO: it would be better to lazily create the index. But we need an Index::open function for milli. + if let Some(BeingDeleted) = + self.index_map.write().unwrap().insert(uuid, Available(index.clone())) + { + panic!("Uuid v4 conflict."); + } + + Ok(index) + } + error => error, + } + } + + /// Removes the index from the mapping table and the in-memory index map + /// but keeps the associated tasks. + pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> { + let uuid = self + .index_mapping + .get(&wtxn, name)? + .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; + + // Once we retrieved the UUID of the index we remove it from the mapping table. + assert!(self.index_mapping.delete(&mut wtxn, name)?); + + wtxn.commit()?; + // We remove the index from the in-memory index map. + let mut lock = self.index_map.write().unwrap(); + let closing_event = match lock.insert(uuid, BeingDeleted) { + Some(Available(index)) => Some(index.prepare_for_closing()), + _ => None, + }; + + drop(lock); + + let index_map = self.index_map.clone(); + let index_path = self.base_path.join(uuid.to_string()); + let index_name = name.to_string(); + thread::Builder::new() + .name(String::from("index_deleter")) + .spawn(move || { + // We first wait to be sure that the previously opened index is effectively closed. + // This can take a lot of time, this is why we do that in a seperate thread. + if let Some(closing_event) = closing_event { + closing_event.wait(); + } + + // Then we remove the content from disk. + if let Err(e) = fs::remove_dir_all(&index_path) { + error!( + "An error happened when deleting the index {} ({}): {}", + index_name, uuid, e + ); + } + + // Finally we remove the entry from the index map. + assert!(matches!(index_map.write().unwrap().remove(&uuid), Some(BeingDeleted))); + }) + .unwrap(); + + Ok(()) + } + + pub fn exists(&self, rtxn: &RoTxn, name: &str) -> Result { + Ok(self.index_mapping.get(rtxn, name)?.is_some()) + } + + /// Return an index, may open it if it wasn't already opened. + pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result { + let uuid = self + .index_mapping + .get(rtxn, name)? + .ok_or_else(|| Error::IndexNotFound(name.to_string()))?; + + // we clone here to drop the lock before entering the match + let index = self.index_map.read().unwrap().get(&uuid).cloned(); + let index = match index { + Some(Available(index)) => index, + Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())), + // since we're lazy, it's possible that the index has not been opened yet. + None => { + let mut index_map = self.index_map.write().unwrap(); + // between the read lock and the write lock it's not impossible + // that someone already opened the index (eg if two search happens + // at the same time), thus before opening it we check a second time + // if it's not already there. + // Since there is a good chance it's not already there we can use + // the entry method. + match index_map.entry(uuid) { + Entry::Vacant(entry) => { + let index_path = self.base_path.join(uuid.to_string()); + let index = self.create_or_open_index(&index_path)?; + entry.insert(Available(index.clone())); + index + } + Entry::Occupied(entry) => match entry.get() { + Available(index) => index.clone(), + BeingDeleted => return Err(Error::IndexNotFound(name.to_string())), + }, + } + } + }; + + Ok(index) + } + + /// Return all indexes, may open them if they weren't already opened. + pub fn indexes(&self, rtxn: &RoTxn) -> Result> { + self.index_mapping + .iter(rtxn)? + .map(|ret| { + ret.map_err(Error::from).and_then(|(name, _)| { + self.index(rtxn, name).map(|index| (name.to_string(), index)) + }) + }) + .collect() + } + + /// Swap two index names. + pub fn swap(&self, wtxn: &mut RwTxn, lhs: &str, rhs: &str) -> Result<()> { + let lhs_uuid = self + .index_mapping + .get(wtxn, lhs)? + .ok_or_else(|| Error::IndexNotFound(lhs.to_string()))?; + let rhs_uuid = self + .index_mapping + .get(wtxn, rhs)? + .ok_or_else(|| Error::IndexNotFound(rhs.to_string()))?; + + self.index_mapping.put(wtxn, lhs, &rhs_uuid)?; + self.index_mapping.put(wtxn, rhs, &lhs_uuid)?; + + Ok(()) + } + + pub fn index_exists(&self, rtxn: &RoTxn, name: &str) -> Result { + Ok(self.index_mapping.get(rtxn, name)?.is_some()) + } + + pub fn indexer_config(&self) -> &IndexerConfig { + &self.indexer_config + } +} diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs new file mode 100644 index 000000000..0f0c9953a --- /dev/null +++ b/index-scheduler/src/insta_snapshot.rs @@ -0,0 +1,256 @@ +use std::fmt::Write; + +use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{Database, RoTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Details, Task}; +use roaring::RoaringBitmap; + +use crate::index_mapper::IndexMapper; +use crate::{IndexScheduler, Kind, Status, BEI128}; + +pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { + scheduler.assert_internally_consistent(); + + let IndexScheduler { + autobatching_enabled, + must_stop_processing: _, + processing_tasks, + file_store, + env, + all_tasks, + status, + kind, + index_tasks, + canceled_by, + enqueued_at, + started_at, + finished_at, + index_mapper, + wake_up: _, + dumps_path: _, + snapshots_path: _, + auth_path: _, + version_file_path: _, + test_breakpoint_sdr: _, + planned_failures: _, + run_loop_iteration: _, + } = scheduler; + + let rtxn = env.read_txn().unwrap(); + + let mut snap = String::new(); + + let processing_tasks = processing_tasks.read().unwrap().processing.clone(); + snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n")); + snap.push_str("### Processing Tasks:\n"); + snap.push_str(&snapshot_bitmap(&processing_tasks)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap.push_str("### All Tasks:\n"); + snap.push_str(&snapshot_all_tasks(&rtxn, *all_tasks)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Status:\n"); + snap.push_str(&snapshot_status(&rtxn, *status)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Kind:\n"); + snap.push_str(&snapshot_kind(&rtxn, *kind)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Index Tasks:\n"); + snap.push_str(&snapshot_index_tasks(&rtxn, *index_tasks)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Index Mapper:\n"); + snap.push_str(&snapshot_index_mapper(&rtxn, index_mapper)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap.push_str("### Canceled By:\n"); + snap.push_str(&snapshot_canceled_by(&rtxn, *canceled_by)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap.push_str("### Enqueued At:\n"); + snap.push_str(&snapshot_date_db(&rtxn, *enqueued_at)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Started At:\n"); + snap.push_str(&snapshot_date_db(&rtxn, *started_at)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### Finished At:\n"); + snap.push_str(&snapshot_date_db(&rtxn, *finished_at)); + snap.push_str("----------------------------------------------------------------------\n"); + + snap.push_str("### File Store:\n"); + snap.push_str(&snapshot_file_store(file_store)); + snap.push_str("\n----------------------------------------------------------------------\n"); + + snap +} + +pub fn snapshot_file_store(file_store: &file_store::FileStore) -> String { + let mut snap = String::new(); + for uuid in file_store.__all_uuids() { + snap.push_str(&format!("{uuid}\n")); + } + snap +} + +pub fn snapshot_bitmap(r: &RoaringBitmap) -> String { + let mut snap = String::new(); + snap.push('['); + for x in r { + snap.push_str(&format!("{x},")); + } + snap.push(']'); + snap +} + +pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database, SerdeJson>) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (task_id, task) = next.unwrap(); + snap.push_str(&format!("{task_id} {}\n", snapshot_task(&task))); + } + snap +} + +pub fn snapshot_date_db( + rtxn: &RoTxn, + db: Database, CboRoaringBitmapCodec>, +) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (_timestamp, task_ids) = next.unwrap(); + snap.push_str(&format!("[timestamp] {}\n", snapshot_bitmap(&task_ids))); + } + snap +} + +pub fn snapshot_task(task: &Task) -> String { + let mut snap = String::new(); + let Task { + uid, + enqueued_at: _, + started_at: _, + finished_at: _, + error, + canceled_by, + details, + status, + kind, + } = task; + snap.push('{'); + snap.push_str(&format!("uid: {uid}, ")); + snap.push_str(&format!("status: {status}, ")); + if let Some(canceled_by) = canceled_by { + snap.push_str(&format!("canceled_by: {canceled_by}, ")); + } + if let Some(error) = error { + snap.push_str(&format!("error: {error:?}, ")); + } + if let Some(details) = details { + snap.push_str(&format!("details: {}, ", &snapshot_details(details))); + } + snap.push_str(&format!("kind: {kind:?}")); + + snap.push('}'); + snap +} + +fn snapshot_details(d: &Details) -> String { + match d { + Details::DocumentAdditionOrUpdate { + received_documents, + indexed_documents, + } => { + format!("{{ received_documents: {received_documents}, indexed_documents: {indexed_documents:?} }}") + } + Details::SettingsUpdate { settings } => { + format!("{{ settings: {settings:?} }}") + } + Details::IndexInfo { primary_key } => { + format!("{{ primary_key: {primary_key:?} }}") + } + Details::DocumentDeletion { + provided_ids: received_document_ids, + deleted_documents, + } => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"), + Details::ClearAll { deleted_documents } => { + format!("{{ deleted_documents: {deleted_documents:?} }}") + }, + Details::TaskCancelation { + matched_tasks, + canceled_tasks, + original_filter, + } => { + format!("{{ matched_tasks: {matched_tasks:?}, canceled_tasks: {canceled_tasks:?}, original_filter: {original_filter:?} }}") + } + Details::TaskDeletion { + matched_tasks, + deleted_tasks, + original_filter, + } => { + format!("{{ matched_tasks: {matched_tasks:?}, deleted_tasks: {deleted_tasks:?}, original_filter: {original_filter:?} }}") + }, + Details::Dump { dump_uid } => { + format!("{{ dump_uid: {dump_uid:?} }}") + }, + Details::IndexSwap { swaps } => { + format!("{{ swaps: {swaps:?} }}") + } + } +} + +pub fn snapshot_status( + rtxn: &RoTxn, + db: Database, RoaringBitmapCodec>, +) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (status, task_ids) = next.unwrap(); + writeln!(snap, "{status} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} +pub fn snapshot_kind(rtxn: &RoTxn, db: Database, RoaringBitmapCodec>) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (kind, task_ids) = next.unwrap(); + let kind = serde_json::to_string(&kind).unwrap(); + writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} + +pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (index, task_ids) = next.unwrap(); + writeln!(snap, "{index} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} +pub fn snapshot_canceled_by( + rtxn: &RoTxn, + db: Database, RoaringBitmapCodec>, +) -> String { + let mut snap = String::new(); + let iter = db.iter(rtxn).unwrap(); + for next in iter { + let (kind, task_ids) = next.unwrap(); + writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap(); + } + snap +} +pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String { + let names = mapper.indexes(rtxn).unwrap().into_iter().map(|(n, _)| n).collect::>(); + format!("{names:?}") +} diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs new file mode 100644 index 000000000..9787479e5 --- /dev/null +++ b/index-scheduler/src/lib.rs @@ -0,0 +1,3072 @@ +/*! +This crate defines the index scheduler, which is responsible for: +1. Keeping references to meilisearch's indexes and mapping them to their +user-defined names. +2. Scheduling tasks given by the user and executing them, in batch if possible. + +When an `IndexScheduler` is created, a new thread containing a reference to the +scheduler is created. This thread runs the scheduler's run loop, where the +scheduler waits to be woken up to process new tasks. It wakes up when: + +1. it is launched for the first time +2. a new task is registered +3. a batch of tasks has been processed + +It is only within this thread that the scheduler is allowed to process tasks. +On the other hand, the publicly accessible methods of the scheduler can be +called asynchronously from any thread. These methods can either query the +content of the scheduler or enqueue new tasks. +*/ + +mod autobatcher; +mod batch; +pub mod error; +mod index_mapper; +#[cfg(test)] +mod insta_snapshot; +mod utils; + +pub type Result = std::result::Result; +pub type TaskId = u32; + +use std::ops::{Bound, RangeBounds}; +use std::path::PathBuf; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering::Relaxed; +use std::sync::{Arc, RwLock}; +use std::time::Duration; + +use dump::{KindDump, TaskDump, UpdateFile}; +pub use error::Error; +use file_store::FileStore; +use meilisearch_types::error::ResponseError; +use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; +use meilisearch_types::heed::{self, Database, Env, RoTxn}; +use meilisearch_types::milli; +use meilisearch_types::milli::documents::DocumentsBatchBuilder; +use meilisearch_types::milli::update::IndexerConfig; +use meilisearch_types::milli::{CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; +use roaring::RoaringBitmap; +use synchronoise::SignalEvent; +use time::OffsetDateTime; +use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound}; +use uuid::Uuid; + +use crate::index_mapper::IndexMapper; +use crate::utils::check_index_swap_validity; + +pub(crate) type BEI128 = + meilisearch_types::heed::zerocopy::I128; + +/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. +/// +/// An empty/default query (where each field is set to `None`) matches all tasks. +/// Each non-null field restricts the set of tasks further. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Query { + /// The maximum number of tasks to be matched + pub limit: Option, + /// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched + pub from: Option, + /// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls + pub statuses: Option>, + /// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks. + /// + /// The kind of a task is given by: + /// ``` + /// # use meilisearch_types::tasks::{Task, Kind}; + /// # fn doc_func(task: Task) -> Kind { + /// task.kind.as_kind() + /// # } + /// ``` + pub types: Option>, + /// The allowed [index ids](meilisearch_types::tasks::Task::index_uid) of the matched tasks + pub index_uids: Option>, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) to be matched + pub uids: Option>, + /// The [task ids](`meilisearch_types::tasks::Task::uid`) of the [`TaskCancelation`](meilisearch_types::tasks::Task::Kind::TaskCancelation) tasks + /// that canceled the matched tasks. + pub canceled_by: Option>, + /// Exclusive upper bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub before_enqueued_at: Option, + /// Exclusive lower bound of the matched tasks' [`enqueued_at`](meilisearch_types::tasks::Task::enqueued_at) field. + pub after_enqueued_at: Option, + /// Exclusive upper bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub before_started_at: Option, + /// Exclusive lower bound of the matched tasks' [`started_at`](meilisearch_types::tasks::Task::started_at) field. + pub after_started_at: Option, + /// Exclusive upper bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub before_finished_at: Option, + /// Exclusive lower bound of the matched tasks' [`finished_at`](meilisearch_types::tasks::Task::finished_at) field. + pub after_finished_at: Option, +} + +impl Query { + /// Return `true` if every field of the query is set to `None`, such that the query + /// matches all tasks. + pub fn is_empty(&self) -> bool { + matches!( + self, + Query { + limit: None, + from: None, + statuses: None, + types: None, + index_uids: None, + uids: None, + canceled_by: None, + before_enqueued_at: None, + after_enqueued_at: None, + before_started_at: None, + after_started_at: None, + before_finished_at: None, + after_finished_at: None, + } + ) + } + + /// Add an [index id](meilisearch_types::tasks::Task::index_uid) to the list of permitted indexes. + pub fn with_index(self, index_uid: String) -> Self { + let mut index_vec = self.index_uids.unwrap_or_default(); + index_vec.push(index_uid); + Self { index_uids: Some(index_vec), ..self } + } +} + +#[derive(Debug, Clone)] +struct ProcessingTasks { + /// The date and time at which the indexation started. + started_at: OffsetDateTime, + /// The list of tasks ids that are currently running. + processing: RoaringBitmap, +} + +impl ProcessingTasks { + /// Creates an empty `ProcessingAt` struct. + fn new() -> ProcessingTasks { + ProcessingTasks { started_at: OffsetDateTime::now_utc(), processing: RoaringBitmap::new() } + } + + /// Stores the currently processing tasks, and the date time at which it started. + fn start_processing_at(&mut self, started_at: OffsetDateTime, processing: RoaringBitmap) { + self.started_at = started_at; + self.processing = processing; + } + + /// Set the processing tasks to an empty list + fn stop_processing(&mut self) { + self.processing = RoaringBitmap::new(); + } + + /// Returns `true` if there, at least, is one task that is currently processing that we must stop. + fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool { + !self.processing.is_disjoint(canceled_tasks) + } +} + +#[derive(Default, Clone, Debug)] +struct MustStopProcessing(Arc); + +impl MustStopProcessing { + fn get(&self) -> bool { + self.0.load(Relaxed) + } + + fn must_stop(&self) { + self.0.store(true, Relaxed); + } + + fn reset(&self) { + self.0.store(false, Relaxed); + } +} + +/// Database const names for the `IndexScheduler`. +mod db_name { + pub const ALL_TASKS: &str = "all-tasks"; + pub const STATUS: &str = "status"; + pub const KIND: &str = "kind"; + pub const INDEX_TASKS: &str = "index-tasks"; + pub const CANCELED_BY: &str = "canceled_by"; + pub const ENQUEUED_AT: &str = "enqueued-at"; + pub const STARTED_AT: &str = "started-at"; + pub const FINISHED_AT: &str = "finished-at"; +} + +#[cfg(test)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Breakpoint { + // this state is only encountered while creating the scheduler in the test suite. + Init, + + Start, + BatchCreated, + BeforeProcessing, + AfterProcessing, + AbortedIndexation, + ProcessBatchSucceeded, + ProcessBatchFailed, + InsideProcessBatch, +} + +#[derive(Debug)] +pub struct IndexSchedulerOptions { + /// The path to the version file of Meilisearch. + pub version_file_path: PathBuf, + /// The path to the folder containing the auth LMDB env. + pub auth_path: PathBuf, + /// The path to the folder containing the task databases. + pub tasks_path: PathBuf, + /// The path to the file store containing the files associated to the tasks. + pub update_file_path: PathBuf, + /// The path to the folder containing meilisearch's indexes. + pub indexes_path: PathBuf, + /// The path to the folder containing the snapshots. + pub snapshots_path: PathBuf, + /// The path to the folder containing the dumps. + pub dumps_path: PathBuf, + /// The maximum size, in bytes, of each meilisearch index. + pub task_db_size: usize, + /// The maximum size, in bytes, of the tasks index. + pub index_size: usize, + /// Configuration used during indexing for each meilisearch index. + pub indexer_config: IndexerConfig, + /// Set to `true` iff the index scheduler is allowed to automatically + /// batch tasks together, to process multiple tasks at once. + pub autobatching_enabled: bool, +} + +/// Structure which holds meilisearch's indexes and schedules the tasks +/// to be performed on them. +pub struct IndexScheduler { + /// The LMDB environment which the DBs are associated with. + pub(crate) env: Env, + + /// A boolean that can be set to true to stop the currently processing tasks. + pub(crate) must_stop_processing: MustStopProcessing, + + /// The list of tasks currently processing + pub(crate) processing_tasks: Arc>, + + /// The list of files referenced by the tasks + pub(crate) file_store: FileStore, + + // The main database, it contains all the tasks accessible by their Id. + pub(crate) all_tasks: Database, SerdeJson>, + + /// All the tasks ids grouped by their status. + // TODO we should not be able to serialize a `Status::Processing` in this database. + pub(crate) status: Database, RoaringBitmapCodec>, + /// All the tasks ids grouped by their kind. + pub(crate) kind: Database, RoaringBitmapCodec>, + /// Store the tasks associated to an index. + pub(crate) index_tasks: Database, + + /// Store the tasks that were canceled by a task uid + pub(crate) canceled_by: Database, RoaringBitmapCodec>, + + /// Store the task ids of tasks which were enqueued at a specific date + pub(crate) enqueued_at: Database, CboRoaringBitmapCodec>, + + /// Store the task ids of finished tasks which started being processed at a specific date + pub(crate) started_at: Database, CboRoaringBitmapCodec>, + + /// Store the task ids of tasks which finished at a specific date + pub(crate) finished_at: Database, CboRoaringBitmapCodec>, + + /// In charge of creating, opening, storing and returning indexes. + pub(crate) index_mapper: IndexMapper, + + /// Get a signal when a batch needs to be processed. + pub(crate) wake_up: Arc, + + /// Whether auto-batching is enabled or not. + pub(crate) autobatching_enabled: bool, + + /// The path used to create the dumps. + pub(crate) dumps_path: PathBuf, + + /// The path used to create the snapshots. + pub(crate) snapshots_path: PathBuf, + + /// The path to the folder containing the auth LMDB env. + pub(crate) auth_path: PathBuf, + + /// The path to the version file of Meilisearch. + pub(crate) version_file_path: PathBuf, + + // ================= test + // The next entry is dedicated to the tests. + /// Provide a way to set a breakpoint in multiple part of the scheduler. + /// + /// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation. + #[cfg(test)] + test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>, + + /// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler. + /// + /// The first field is the iteration index and the second field identifies a location in the code. + #[cfg(test)] + planned_failures: Vec<(usize, tests::FailureLocation)>, + + /// A counter that is incremented before every call to [`tick`](IndexScheduler::tick) + #[cfg(test)] + run_loop_iteration: Arc>, +} + +impl IndexScheduler { + fn private_clone(&self) -> IndexScheduler { + IndexScheduler { + env: self.env.clone(), + must_stop_processing: self.must_stop_processing.clone(), + processing_tasks: self.processing_tasks.clone(), + file_store: self.file_store.clone(), + all_tasks: self.all_tasks, + status: self.status, + kind: self.kind, + index_tasks: self.index_tasks, + canceled_by: self.canceled_by, + enqueued_at: self.enqueued_at, + started_at: self.started_at, + finished_at: self.finished_at, + index_mapper: self.index_mapper.clone(), + wake_up: self.wake_up.clone(), + autobatching_enabled: self.autobatching_enabled, + snapshots_path: self.snapshots_path.clone(), + dumps_path: self.dumps_path.clone(), + auth_path: self.auth_path.clone(), + version_file_path: self.version_file_path.clone(), + #[cfg(test)] + test_breakpoint_sdr: self.test_breakpoint_sdr.clone(), + #[cfg(test)] + planned_failures: self.planned_failures.clone(), + #[cfg(test)] + run_loop_iteration: self.run_loop_iteration.clone(), + } + } +} + +impl IndexScheduler { + /// Create an index scheduler and start its run loop. + pub fn new( + options: IndexSchedulerOptions, + #[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>, + #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>, + ) -> Result { + std::fs::create_dir_all(&options.tasks_path)?; + std::fs::create_dir_all(&options.update_file_path)?; + std::fs::create_dir_all(&options.indexes_path)?; + std::fs::create_dir_all(&options.dumps_path)?; + + let env = heed::EnvOpenOptions::new() + .max_dbs(10) + .map_size(options.task_db_size) + .open(options.tasks_path)?; + let file_store = FileStore::new(&options.update_file_path)?; + + // allow unreachable_code to get rids of the warning in the case of a test build. + let this = Self { + must_stop_processing: MustStopProcessing::default(), + processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())), + file_store, + all_tasks: env.create_database(Some(db_name::ALL_TASKS))?, + status: env.create_database(Some(db_name::STATUS))?, + kind: env.create_database(Some(db_name::KIND))?, + index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?, + canceled_by: env.create_database(Some(db_name::CANCELED_BY))?, + enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?, + started_at: env.create_database(Some(db_name::STARTED_AT))?, + finished_at: env.create_database(Some(db_name::FINISHED_AT))?, + index_mapper: IndexMapper::new( + &env, + options.indexes_path, + options.index_size, + options.indexer_config, + )?, + env, + // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things + wake_up: Arc::new(SignalEvent::auto(true)), + autobatching_enabled: options.autobatching_enabled, + dumps_path: options.dumps_path, + snapshots_path: options.snapshots_path, + auth_path: options.auth_path, + version_file_path: options.version_file_path, + + #[cfg(test)] + test_breakpoint_sdr, + #[cfg(test)] + planned_failures, + #[cfg(test)] + run_loop_iteration: Arc::new(RwLock::new(0)), + }; + + this.run(); + Ok(this) + } + + pub fn read_txn(&self) -> Result { + self.env.read_txn().map_err(|e| e.into()) + } + + /// Start the run loop for the given index scheduler. + /// + /// This function will execute in a different thread and must be called + /// only once per index scheduler. + fn run(&self) { + let run = self.private_clone(); + std::thread::Builder::new() + .name(String::from("scheduler")) + .spawn(move || { + #[cfg(test)] + run.breakpoint(Breakpoint::Init); + + loop { + run.wake_up.wait(); + + match run.tick() { + Ok(0) => (), + Ok(_) => run.wake_up.signal(), + Err(e) => { + log::error!("{}", e); + // Wait one second when an irrecoverable error occurs. + if matches!( + e, + Error::CorruptedTaskQueue + | Error::TaskDatabaseUpdate(_) + | Error::HeedTransaction(_) + | Error::CreateBatch(_) + ) { + std::thread::sleep(Duration::from_secs(1)); + } + run.wake_up.signal(); + } + } + } + }) + .unwrap(); + } + + pub fn indexer_config(&self) -> &IndexerConfig { + &self.index_mapper.indexer_config + } + + /// Return the index corresponding to the name. + /// + /// * If the index wasn't opened before, the index will be opened. + /// * If the index doesn't exist on disk, the `IndexNotFoundError` is thrown. + pub fn index(&self, name: &str) -> Result { + let rtxn = self.env.read_txn()?; + self.index_mapper.index(&rtxn, name) + } + + /// Return and open all the indexes. + pub fn indexes(&self) -> Result> { + let rtxn = self.env.read_txn()?; + self.index_mapper.indexes(&rtxn) + } + + /// Return the task ids matched by the given query from the index scheduler's point of view. + pub(crate) fn get_task_ids(&self, rtxn: &RoTxn, query: &Query) -> Result { + let ProcessingTasks { + started_at: started_at_processing, processing: processing_tasks, .. + } = self.processing_tasks.read().unwrap().clone(); + + let mut tasks = self.all_task_ids(rtxn)?; + + if let Some(from) = &query.from { + tasks.remove_range(from.saturating_add(1)..); + } + + if let Some(status) = &query.statuses { + let mut status_tasks = RoaringBitmap::new(); + for status in status { + match status { + // special case for Processing tasks + Status::Processing => { + status_tasks |= &processing_tasks; + } + status => status_tasks |= &self.get_status(rtxn, *status)?, + }; + } + if !status.contains(&Status::Processing) { + tasks -= &processing_tasks; + } + tasks &= status_tasks; + } + + if let Some(uids) = &query.uids { + let uids = RoaringBitmap::from_iter(uids); + tasks &= &uids; + } + + if let Some(canceled_by) = &query.canceled_by { + for cancel_task_uid in canceled_by { + if let Some(canceled_by_uid) = + self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))? + { + tasks &= canceled_by_uid; + } + } + } + + if let Some(kind) = &query.types { + let mut kind_tasks = RoaringBitmap::new(); + for kind in kind { + kind_tasks |= self.get_kind(rtxn, *kind)?; + } + tasks &= &kind_tasks; + } + + if let Some(index) = &query.index_uids { + let mut index_tasks = RoaringBitmap::new(); + for index in index { + index_tasks |= self.index_tasks(rtxn, index)?; + } + tasks &= &index_tasks; + } + + // For the started_at filter, we need to treat the part of the tasks that are processing from the part of the + // tasks that are not processing. The non-processing ones are filtered normally while the processing ones + // are entirely removed unless the in-memory startedAt variable falls within the date filter. + // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. + tasks = { + let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = + (&tasks - &processing_tasks, &tasks & &processing_tasks); + + // special case for Processing tasks + // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds + let mut clear_filtered_processing_tasks = + |start: Bound, end: Bound| { + let start = map_bound(start, |b| b.unix_timestamp_nanos()); + let end = map_bound(end, |b| b.unix_timestamp_nanos()); + let is_within_dates = RangeBounds::contains( + &(start, end), + &started_at_processing.unix_timestamp_nanos(), + ); + if !is_within_dates { + filtered_processing_tasks.clear(); + } + }; + match (query.after_started_at, query.before_started_at) { + (None, None) => (), + (None, Some(before)) => { + clear_filtered_processing_tasks(Bound::Unbounded, Bound::Excluded(before)) + } + (Some(after), None) => { + clear_filtered_processing_tasks(Bound::Excluded(after), Bound::Unbounded) + } + (Some(after), Some(before)) => { + clear_filtered_processing_tasks(Bound::Excluded(after), Bound::Excluded(before)) + } + }; + + keep_tasks_within_datetimes( + rtxn, + &mut filtered_non_processing_tasks, + self.started_at, + query.after_started_at, + query.before_started_at, + )?; + filtered_non_processing_tasks | filtered_processing_tasks + }; + + keep_tasks_within_datetimes( + rtxn, + &mut tasks, + self.enqueued_at, + query.after_enqueued_at, + query.before_enqueued_at, + )?; + + keep_tasks_within_datetimes( + rtxn, + &mut tasks, + self.finished_at, + query.after_finished_at, + query.before_finished_at, + )?; + + if let Some(limit) = query.limit { + tasks = tasks.into_iter().rev().take(limit as usize).collect(); + } + + Ok(tasks) + } + + /// Return true iff there is at least one task associated with this index + /// that is processing. + pub fn is_index_processing(&self, index: &str) -> Result { + let rtxn = self.env.read_txn()?; + let processing_tasks = self.processing_tasks.read().unwrap().processing.clone(); + let index_tasks = self.index_tasks(&rtxn, index)?; + let nbr_index_processing_tasks = processing_tasks.intersection_len(&index_tasks); + Ok(nbr_index_processing_tasks > 0) + } + + /// Return the task ids matching the query from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub fn get_task_ids_from_authorized_indexes( + &self, + rtxn: &RoTxn, + query: &Query, + authorized_indexes: &Option>, + ) -> Result { + let mut tasks = self.get_task_ids(rtxn, query)?; + + // If the query contains a list of index uid or there is a finite list of authorized indexes, + // then we must exclude all the kinds that aren't associated to one and only one index. + if query.index_uids.is_some() || authorized_indexes.is_some() { + for kind in enum_iterator::all::().filter(|kind| !kind.related_to_one_index()) { + tasks -= self.get_kind(rtxn, kind)?; + } + } + + // Any task that is internally associated with a non-authorized index + // must be discarded. + if let Some(authorized_indexes) = authorized_indexes { + let all_indexes_iter = self.index_tasks.iter(rtxn)?; + for result in all_indexes_iter { + let (index, index_tasks) = result?; + if !authorized_indexes.contains(&index.to_owned()) { + tasks -= index_tasks; + } + } + } + + Ok(tasks) + } + + /// Return the tasks matching the query from the user's point of view. + /// + /// There are two differences between an internal query and a query executed by + /// the user. + /// + /// 1. IndexSwap tasks are not publicly associated with any index, but they are associated + /// with many indexes internally. + /// 2. The user may not have the rights to access the tasks (internally) associated with all indexes. + pub fn get_tasks_from_authorized_indexes( + &self, + query: Query, + authorized_indexes: Option>, + ) -> Result> { + let rtxn = self.env.read_txn()?; + + let tasks = + self.get_task_ids_from_authorized_indexes(&rtxn, &query, &authorized_indexes)?; + + let tasks = self.get_existing_tasks( + &rtxn, + tasks.into_iter().rev().take(query.limit.unwrap_or(u32::MAX) as usize), + )?; + + let ProcessingTasks { started_at, processing, .. } = + self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone(); + + let ret = tasks.into_iter(); + if processing.is_empty() { + Ok(ret.collect()) + } else { + Ok(ret + .map(|task| match processing.contains(task.uid) { + true => { + Task { status: Status::Processing, started_at: Some(started_at), ..task } + } + false => task, + }) + .collect()) + } + } + + /// Register a new task in the scheduler. + /// + /// If it fails and data was associated with the task, it tries to delete the associated data. + pub fn register(&self, kind: KindWithContent) -> Result { + let mut wtxn = self.env.write_txn()?; + + let mut task = Task { + uid: self.next_task_id(&wtxn)?, + enqueued_at: time::OffsetDateTime::now_utc(), + started_at: None, + finished_at: None, + error: None, + canceled_by: None, + details: kind.default_details(), + status: Status::Enqueued, + kind: kind.clone(), + }; + // For deletion and cancelation tasks, we want to make extra sure that they + // don't attempt to delete/cancel tasks that are newer than themselves. + filter_out_references_to_newer_tasks(&mut task); + // If the register task is an index swap task, verify that it is well-formed + // (that it does not contain duplicate indexes). + check_index_swap_validity(&task)?; + + // Get rid of the mutability. + let task = task; + + self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?; + + for index in task.indexes() { + self.update_index(&mut wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.update_status(&mut wtxn, Status::Enqueued, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + + utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)?; + + if let Err(e) = wtxn.commit() { + self.delete_persisted_task_data(&task)?; + return Err(e.into()); + } + + // If the registered task is a task cancelation + // we inform the processing tasks to stop (if necessary). + if let KindWithContent::TaskCancelation { tasks, .. } = kind { + let tasks_to_cancel = RoaringBitmap::from_iter(tasks); + if self.processing_tasks.read().unwrap().must_cancel_processing_tasks(&tasks_to_cancel) + { + self.must_stop_processing.must_stop(); + } + } + + // notify the scheduler loop to execute a new tick + self.wake_up.signal(); + + Ok(task) + } + + /// Register a new task comming from a dump in the scheduler. + /// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running. + pub fn register_dumped_task( + &mut self, + task: TaskDump, + content_file: Option>, + ) -> Result { + // Currently we don't need to access the tasks queue while loading a dump thus I can block everything. + let mut wtxn = self.env.write_txn()?; + + let content_uuid = match content_file { + Some(content_file) if task.status == Status::Enqueued => { + let (uuid, mut file) = self.create_update_file()?; + let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); + for doc in content_file { + builder.append_json_object(&doc?)?; + } + builder.into_inner()?; + file.persist()?; + + Some(uuid) + } + // If the task isn't `Enqueued` then just generate a recognisable `Uuid` + // in case we try to open it later. + _ if task.status != Status::Enqueued => Some(Uuid::nil()), + _ => None, + }; + + let task = Task { + uid: task.uid, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + error: task.error, + canceled_by: task.canceled_by, + details: task.details, + status: task.status, + kind: match task.kind { + KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + } => KindWithContent::DocumentAdditionOrUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + method, + content_file: content_uuid.ok_or(Error::CorruptedDump)?, + documents_count, + allow_index_creation, + }, + KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { + documents_ids, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::DocumentClear => KindWithContent::DocumentClear { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::Settings { settings, is_deletion, allow_index_creation } => { + KindWithContent::SettingsUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + new_settings: settings, + is_deletion, + allow_index_creation, + } + } + KindDump::IndexDeletion => KindWithContent::IndexDeletion { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, + KindDump::TaskCancelation { query, tasks } => { + KindWithContent::TaskCancelation { query, tasks } + } + KindDump::TasksDeletion { query, tasks } => { + KindWithContent::TaskDeletion { query, tasks } + } + KindDump::DumpCreation { keys, instance_uid } => { + KindWithContent::DumpCreation { keys, instance_uid } + } + KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, + }, + }; + + self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?; + + for index in task.indexes() { + self.update_index(&mut wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.update_status(&mut wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { + (bitmap.insert(task.uid)); + })?; + + wtxn.commit()?; + self.wake_up.signal(); + + Ok(task) + } + + /// Create a new index without any associated task. + pub fn create_raw_index(&self, name: &str) -> Result { + let wtxn = self.env.write_txn()?; + let index = self.index_mapper.create_index(wtxn, name)?; + + Ok(index) + } + + /// Create a file and register it in the index scheduler. + /// + /// The returned file and uuid can be used to associate + /// some data to a task. The file will be kept until + /// the task has been fully processed. + pub fn create_update_file(&self) -> Result<(Uuid, file_store::File)> { + Ok(self.file_store.new_update()?) + } + + #[cfg(test)] + pub fn create_update_file_with_uuid(&self, uuid: u128) -> Result<(Uuid, file_store::File)> { + Ok(self.file_store.new_update_with_uuid(uuid)?) + } + + /// Delete a file from the index scheduler. + /// + /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. + pub fn delete_update_file(&self, uuid: Uuid) -> Result<()> { + Ok(self.file_store.delete(uuid)?) + } + + /// Perform one iteration of the run loop. + /// + /// 1. Find the next batch of tasks to be processed. + /// 2. Update the information of these tasks following the start of their processing. + /// 3. Update the in-memory list of processed tasks accordingly. + /// 4. Process the batch: + /// - perform the actions of each batched task + /// - update the information of each batched task following the end + /// of their processing. + /// 5. Reset the in-memory list of processed tasks. + /// + /// Returns the number of processed tasks. + fn tick(&self) -> Result { + #[cfg(test)] + { + *self.run_loop_iteration.write().unwrap() += 1; + self.breakpoint(Breakpoint::Start); + } + + let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?; + let batch = + match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? { + Some(batch) => batch, + None => return Ok(0), + }; + drop(rtxn); + + // 1. store the starting date with the bitmap of processing tasks. + let mut ids = batch.ids(); + ids.sort_unstable(); + let processed_tasks = ids.len(); + let processing_tasks = RoaringBitmap::from_sorted_iter(ids.iter().copied()).unwrap(); + let started_at = OffsetDateTime::now_utc(); + + // We reset the must_stop flag to be sure that we don't stop processing tasks + self.must_stop_processing.reset(); + self.processing_tasks.write().unwrap().start_processing_at(started_at, processing_tasks); + + #[cfg(test)] + self.breakpoint(Breakpoint::BatchCreated); + + // 2. Process the tasks + let res = { + let cloned_index_scheduler = self.private_clone(); + let handle = std::thread::Builder::new() + .name(String::from("batch-operation")) + .spawn(move || cloned_index_scheduler.process_batch(batch)) + .unwrap(); + handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) + }; + + #[cfg(test)] + self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; + + let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; + + let finished_at = OffsetDateTime::now_utc(); + match res { + Ok(tasks) => { + #[cfg(test)] + self.breakpoint(Breakpoint::ProcessBatchSucceeded); + + #[allow(unused_variables)] + for (i, mut task) in tasks.into_iter().enumerate() { + task.started_at = Some(started_at); + task.finished_at = Some(finished_at); + + #[cfg(test)] + self.maybe_fail( + tests::FailureLocation::UpdatingTaskAfterProcessBatchSuccess { + task_uid: i as u32, + }, + )?; + + self.update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + if let Err(e) = self.delete_persisted_task_data(&task) { + log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + } + } + log::info!("A batch of tasks was successfully completed."); + } + // If we have an abortion error we must stop the tick here and re-schedule tasks. + Err(Error::Milli(milli::Error::InternalError( + milli::InternalError::AbortedIndexation, + ))) => { + #[cfg(test)] + self.breakpoint(Breakpoint::AbortedIndexation); + wtxn.abort().map_err(Error::HeedTransaction)?; + + // We make sure that we don't call `stop_processing` on the `processing_tasks`, + // this is because we want to let the next tick call `create_next_batch` and keep + // the `started_at` date times and `processings` of the current processing tasks. + // This date time is used by the task cancelation to store the right `started_at` + // date in the task on disk. + return Ok(0); + } + // In case of a failure we must get back and patch all the tasks with the error. + Err(err) => { + #[cfg(test)] + self.breakpoint(Breakpoint::ProcessBatchFailed); + let error: ResponseError = err.into(); + for id in ids { + let mut task = self + .get_task(&wtxn, id) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? + .ok_or(Error::CorruptedTaskQueue)?; + task.started_at = Some(started_at); + task.finished_at = Some(finished_at); + task.status = Status::Failed; + task.error = Some(error.clone()); + task.details = task.details.map(|d| d.to_failed()); + + #[cfg(test)] + self.maybe_fail(tests::FailureLocation::UpdatingTaskAfterProcessBatchFailure)?; + + if let Err(e) = self.delete_persisted_task_data(&task) { + log::error!("Failure to delete the content files associated with task {}. Error: {e}", task.uid); + } + self.update_task(&mut wtxn, &task) + .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?; + } + } + } + + self.processing_tasks.write().unwrap().stop_processing(); + + #[cfg(test)] + self.maybe_fail(tests::FailureLocation::CommittingWtxn)?; + + wtxn.commit().map_err(Error::HeedTransaction)?; + + #[cfg(test)] + self.breakpoint(Breakpoint::AfterProcessing); + + Ok(processed_tasks) + } + + pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> { + match task.content_uuid() { + Some(content_file) => self.delete_update_file(content_file), + None => Ok(()), + } + } + + /// Blocks the thread until the test handle asks to progress to/through this breakpoint. + /// + /// Two messages are sent through the channel for each breakpoint. + /// The first message is `(b, false)` and the second message is `(b, true)`. + /// + /// Since the channel has a capacity of zero, the `send` and `recv` calls wait for each other. + /// So when the index scheduler calls `test_breakpoint_sdr.send(b, false)`, it blocks + /// the thread until the test catches up by calling `test_breakpoint_rcv.recv()` enough. + /// From the test side, we call `recv()` repeatedly until we find the message `(breakpoint, false)`. + /// As soon as we find it, the index scheduler is unblocked but then wait again on the call to + /// `test_breakpoint_sdr.send(b, true)`. This message will only be able to send once the + /// test asks to progress to the next `(b2, false)`. + #[cfg(test)] + fn breakpoint(&self, b: Breakpoint) { + // We send two messages. The first one will sync with the call + // to `handle.wait_until(b)`. The second one will block until the + // the next call to `handle.wait_until(..)`. + self.test_breakpoint_sdr.send((b, false)).unwrap(); + // This one will only be able to be sent if the test handle stays alive. + // If it fails, then it means that we have exited the test. + // By crashing with `unwrap`, we kill the run loop. + self.test_breakpoint_sdr.send((b, true)).unwrap(); + } +} + +#[cfg(test)] +mod tests { + use std::time::Instant; + + use big_s::S; + use crossbeam::channel::RecvTimeoutError; + use file_store::File; + use meili_snap::snapshot; + use meilisearch_types::milli::obkv_to_json; + use meilisearch_types::milli::update::IndexDocumentsMethod::{ + ReplaceDocuments, UpdateDocuments, + }; + use meilisearch_types::tasks::IndexSwap; + use meilisearch_types::VERSION_FILE_NAME; + use tempfile::TempDir; + use time::Duration; + use uuid::Uuid; + use Breakpoint::*; + + use super::*; + use crate::insta_snapshot::{snapshot_bitmap, snapshot_index_scheduler}; + + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub enum FailureLocation { + InsideCreateBatch, + InsideProcessBatch, + PanicInsideProcessBatch, + AcquiringWtxn, + UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, + UpdatingTaskAfterProcessBatchFailure, + CommittingWtxn, + } + + impl IndexScheduler { + pub fn test( + autobatching_enabled: bool, + planned_failures: Vec<(usize, FailureLocation)>, + ) -> (Self, IndexSchedulerHandle) { + let tempdir = TempDir::new().unwrap(); + let (sender, receiver) = crossbeam::channel::bounded(0); + + let options = IndexSchedulerOptions { + version_file_path: tempdir.path().join(VERSION_FILE_NAME), + auth_path: tempdir.path().join("auth"), + tasks_path: tempdir.path().join("db_path"), + update_file_path: tempdir.path().join("file_store"), + indexes_path: tempdir.path().join("indexes"), + snapshots_path: tempdir.path().join("snapshots"), + dumps_path: tempdir.path().join("dumps"), + task_db_size: 1024 * 1024, // 1 MiB + index_size: 1024 * 1024, // 1 MiB + indexer_config: IndexerConfig::default(), + autobatching_enabled, + }; + + let index_scheduler = Self::new(options, sender, planned_failures).unwrap(); + + // To be 100% consistent between all test we're going to start the scheduler right now + // and ensure it's in the expected starting state. + let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(1)) { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") + } + Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), + }; + assert_eq!(breakpoint, (Init, false)); + let index_scheduler_handle = IndexSchedulerHandle { + _tempdir: tempdir, + test_breakpoint_rcv: receiver, + last_breakpoint: breakpoint.0, + }; + + (index_scheduler, index_scheduler_handle) + } + + /// Return a [`CorruptedTaskQueue`](Error::CorruptedTaskQueue) error if a failure is planned + /// for the given location and current run loop iteration. + pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> { + if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location)) + { + match location { + FailureLocation::PanicInsideProcessBatch => { + panic!("simulated panic") + } + _ => Err(Error::CorruptedTaskQueue), + } + } else { + Ok(()) + } + } + } + + /// Return a `KindWithContent::IndexCreation` task + fn index_creation_task(index: &'static str, primary_key: &'static str) -> KindWithContent { + KindWithContent::IndexCreation { index_uid: S(index), primary_key: Some(S(primary_key)) } + } + /// Create a `KindWithContent::DocumentImport` task that imports documents. + /// + /// - `index_uid` is given as parameter + /// - `primary_key` is given as parameter + /// - `method` is set to `ReplaceDocuments` + /// - `content_file` is given as parameter + /// - `documents_count` is given as parameter + /// - `allow_index_creation` is set to `true` + fn replace_document_import_task( + index: &'static str, + primary_key: Option<&'static str>, + content_file_uuid: u128, + documents_count: u64, + ) -> KindWithContent { + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S(index), + primary_key: primary_key.map(ToOwned::to_owned), + method: ReplaceDocuments, + content_file: Uuid::from_u128(content_file_uuid), + documents_count, + allow_index_creation: true, + } + } + + /// Create an update file with the given file uuid. + /// + /// The update file contains just one simple document whose id is given by `document_id`. + /// + /// The uuid of the file and its documents count is returned. + fn sample_documents( + index_scheduler: &IndexScheduler, + file_uuid: u128, + document_id: usize, + ) -> (File, u64) { + let content = format!( + r#" + {{ + "id" : "{document_id}" + }}"# + ); + + let (_uuid, mut file) = index_scheduler.create_update_file_with_uuid(file_uuid).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + (file, documents_count) + } + + pub struct IndexSchedulerHandle { + _tempdir: TempDir, + test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>, + last_breakpoint: Breakpoint, + } + + impl IndexSchedulerHandle { + /// Advance the scheduler to the next tick. + /// Panic + /// * If the scheduler is waiting for a task to be registered. + /// * If the breakpoint queue is in a bad state. + #[track_caller] + fn advance(&mut self) -> Breakpoint { + let (breakpoint_1, b) = match self + .test_breakpoint_rcv + .recv_timeout(std::time::Duration::from_secs(5)) + { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") + } + Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), + }; + // if we've already encountered a breakpoint we're supposed to be stuck on the false + // and we expect the same variant with the true to come now. + assert_eq!( + (breakpoint_1, b), + (self.last_breakpoint, true), + "Internal error in the test suite. In the previous iteration I got `({:?}, false)` and now I got `({:?}, {:?})`.", + self.last_breakpoint, + breakpoint_1, + b, + ); + + let (breakpoint_2, b) = match self + .test_breakpoint_rcv + .recv_timeout(std::time::Duration::from_secs(5)) + { + Ok(b) => b, + Err(RecvTimeoutError::Timeout) => { + panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.") + } + Err(RecvTimeoutError::Disconnected) => panic!("The scheduler crashed."), + }; + assert!(!b, "Found the breakpoint handle in a bad state. Check your test suite"); + + self.last_breakpoint = breakpoint_2; + + breakpoint_2 + } + + /// Advance the scheduler until all the provided breakpoints are reached in order. + #[track_caller] + fn advance_till(&mut self, breakpoints: impl IntoIterator) { + for breakpoint in breakpoints { + let b = self.advance(); + assert_eq!( + b, breakpoint, + "Was expecting the breakpoint `{:?}` but instead got `{:?}`.", + breakpoint, b + ); + } + } + + /// Wait for `n` successful batches. + #[track_caller] + fn advance_n_successful_batches(&mut self, n: usize) { + for _ in 0..n { + self.advance_one_successful_batch(); + } + } + + /// Wait for `n` failed batches. + #[track_caller] + fn advance_n_failed_batches(&mut self, n: usize) { + for _ in 0..n { + self.advance_one_failed_batch(); + } + } + + // Wait for one successful batch. + #[track_caller] + fn advance_one_successful_batch(&mut self) { + self.advance_till([Start, BatchCreated]); + loop { + match self.advance() { + // the process_batch function can call itself recursively, thus we need to + // accept as may InsideProcessBatch as possible before moving to the next state. + InsideProcessBatch => (), + // the batch went successfully, we can stop the loop and go on with the next states. + ProcessBatchSucceeded => break, + AbortedIndexation => panic!("The batch was aborted."), + ProcessBatchFailed => panic!("The batch failed."), + breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), + } + } + + self.advance_till([AfterProcessing]); + } + + // Wait for one failed batch. + #[track_caller] + fn advance_one_failed_batch(&mut self) { + self.advance_till([Start, BatchCreated]); + loop { + match self.advance() { + // the process_batch function can call itself recursively, thus we need to + // accept as may InsideProcessBatch as possible before moving to the next state. + InsideProcessBatch => (), + // the batch went failed, we can stop the loop and go on with the next states. + ProcessBatchFailed => break, + ProcessBatchSucceeded => panic!("The batch succeeded. (and it wasn't supposed to sorry)"), + AbortedIndexation => panic!("The batch was aborted."), + breakpoint => panic!("Encountered an impossible breakpoint `{:?}`, this is probably an issue with the test suite.", breakpoint), + } + } + self.advance_till([AfterProcessing]); + } + } + + #[test] + fn register() { + // In this test, the handle doesn't make any progress, we only check that the tasks are registered + let (index_scheduler, mut _handle) = IndexScheduler::test(true, vec![]); + + let kinds = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, 12), + replace_document_import_task("catto", None, 1, 50), + replace_document_import_task("doggo", Some("bone"), 2, 5000), + ]; + let (_, file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.create_update_file_with_uuid(1).unwrap(); + file.persist().unwrap(); + let (_, file) = index_scheduler.create_update_file_with_uuid(2).unwrap(); + file.persist().unwrap(); + + for (idx, kind) in kinds.into_iter().enumerate() { + let k = kind.as_kind(); + let task = index_scheduler.register(kind).unwrap(); + index_scheduler.assert_internally_consistent(); + + assert_eq!(task.uid, idx as u32); + assert_eq!(task.status, Status::Enqueued); + assert_eq!(task.kind.as_kind(), k); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_succesfully_registered"); + } + + #[test] + fn insert_task_while_another_task_is_processing() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler.register(index_creation_task("index_a", "id")).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_creation"); + + // while the task is processing can we register another task? + index_scheduler.register(index_creation_task("index_b", "id")).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("index_a") }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + } + + /// We send a lot of tasks but notify the tasks scheduler only once as + /// we send them very fast, we must make sure that they are all processed. + #[test] + fn process_tasks_inserted_without_new_signal() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("cattos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_second_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_third_task"); + } + + #[test] + fn process_tasks_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: S("doggos") }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_fourth_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "fourth"); + } + + #[test] + fn task_deletion_undeleteable() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + index_creation_task("catto", "mouse"), + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + // here we have registered all the tasks, but the index scheduler + // has not progressed at all + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + index_scheduler + .register(KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1]), + }) + .unwrap(); + // again, no progress made at all, but one more task is registered + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_enqueued"); + + // now we create the first batch + handle.advance_till([Start, BatchCreated]); + + // the task deletion should now be "processing" + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processing"); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + // after the task deletion is processed, no task should actually have been deleted, + // because the tasks with ids 0 and 1 were still "enqueued", and thus undeleteable + // the "task deletion" task should be marked as "succeeded" and, in its details, the + // number of deleted tasks should be 0 + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_done"); + } + + #[test] + fn task_deletion_deleteable() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.advance_one_successful_batch(); + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task + index_scheduler + .register(KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_task_deletion"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); + } + + #[test] + fn task_deletion_delete_same_task_twice() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file0.persist().unwrap(); + file1.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("doggo", Some("bone"), 1, documents_count1), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + + handle.advance_one_successful_batch(); + // first addition of documents should be successful + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_processed"); + + // Now we delete the first task multiple times in a row + for _ in 0..2 { + index_scheduler + .register(KindWithContent::TaskDeletion { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + for _ in 0..2 { + handle.advance_one_successful_batch(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_deletion_processed"); + } + + #[test] + fn document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); + + handle.advance_till([Start, BatchCreated]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_batch_creation"); + + handle.advance_till([InsideProcessBatch, ProcessBatchSucceeded, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "once_everything_is_processed"); + } + + #[test] + fn document_addition_and_index_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_one_successful_batch(); // The index creation. + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "before_index_creation"); + handle.advance_one_successful_batch(); // // after the execution of the two tasks in a single batch. + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded"); + } + + #[test] + fn do_not_batch_task_of_different_indexes() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + let index_names = ["doggos", "cattos", "girafos"]; + + for name in index_names { + index_scheduler + .register(KindWithContent::IndexCreation { + index_uid: name.to_string(), + primary_key: None, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for name in index_names { + index_scheduler + .register(KindWithContent::DocumentClear { index_uid: name.to_string() }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + + for _ in 0..(index_names.len() * 2) { + handle.advance_one_successful_batch(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + } + + #[test] + fn swap_indexes() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_a"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_b"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_c"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "create_d"); + + index_scheduler + .register(KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "d".to_owned()) }, + ], + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_registered"); + index_scheduler + .register(KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("a".to_owned(), "c".to_owned()) }], + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "two_swaps_registered"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_processed"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "second_swap_processed"); + + index_scheduler.register(KindWithContent::IndexSwap { swaps: vec![] }).unwrap(); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "third_empty_swap_processed"); + } + + #[test] + fn swap_indexes_errors() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let to_enqueue = [ + index_creation_task("a", "id"), + index_creation_task("b", "id"), + index_creation_task("c", "id"), + index_creation_task("d", "id"), + ]; + + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_n_successful_batches(4); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_the_index_creation"); + + let first_snap = snapshot_index_scheduler(&index_scheduler); + snapshot!(first_snap, name: "initial_tasks_processed"); + + let err = index_scheduler + .register(KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("b".to_owned(), "a".to_owned()) }, + ], + }) + .unwrap_err(); + snapshot!(format!("{err}"), @"Indexes must be declared only once during a swap. `a`, `b` were specified several times."); + + let second_snap = snapshot_index_scheduler(&index_scheduler); + assert_eq!(first_snap, second_snap); + + // Index `e` does not exist, but we don't check its existence yet + index_scheduler + .register(KindWithContent::IndexSwap { + swaps: vec![ + IndexSwap { indexes: ("a".to_owned(), "b".to_owned()) }, + IndexSwap { indexes: ("c".to_owned(), "e".to_owned()) }, + IndexSwap { indexes: ("d".to_owned(), "f".to_owned()) }, + ], + }) + .unwrap(); + handle.advance_one_failed_batch(); + // Now the first swap should have an error message saying `e` and `f` do not exist + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_swap_failed"); + } + + #[test] + fn document_addition_and_index_deletion_on_unexisting_index() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler + .register(KindWithContent::IndexDeletion { index_uid: S("doggos") }) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler)); + } + + #[test] + fn cancel_enqueued_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }, + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_tasks_enqueued"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn cancel_succeeded_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0)) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processed"); + + index_scheduler + .register(KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn cancel_processing_task() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + + let _ = index_scheduler + .register(replace_document_import_task("catto", None, 0, documents_count0)) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "initial_task_processing"); + + index_scheduler + .register(KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0]), + }) + .unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_task_registered"); + // Now we check that we can reach the AbortedIndexation error handling + handle.advance_till([AbortedIndexation]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); + + // handle.advance_till([Start, BatchCreated, BeforeProcessing, AfterProcessing]); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn cancel_mix_of_tasks() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let (file0, documents_count0) = sample_documents(&index_scheduler, 0, 0); + file0.persist().unwrap(); + let (file1, documents_count1) = sample_documents(&index_scheduler, 1, 1); + file1.persist().unwrap(); + let (file2, documents_count2) = sample_documents(&index_scheduler, 2, 2); + file2.persist().unwrap(); + + let to_enqueue = [ + replace_document_import_task("catto", None, 0, documents_count0), + replace_document_import_task("beavero", None, 1, documents_count1), + replace_document_import_task("wolfo", None, 2, documents_count2), + ]; + for task in to_enqueue { + let _ = index_scheduler.register(task).unwrap(); + index_scheduler.assert_internally_consistent(); + } + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "first_task_processed"); + + handle.advance_till([Start, BatchCreated, InsideProcessBatch]); + index_scheduler + .register(KindWithContent::TaskCancelation { + query: "test_query".to_owned(), + tasks: RoaringBitmap::from_iter([0, 1, 2]), + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processing_second_task_cancel_enqueued"); + + handle.advance_till([AbortedIndexation]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "aborted_indexation"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed"); + } + + #[test] + fn test_document_replace() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_update() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler)); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_mixed_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let method = if i % 2 == 0 { UpdateDocuments } else { ReplaceDocuments }; + + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Only half of the task should've been processed since we can't autobatch replace and update together. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_replace_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_update_without_autobatching() { + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: UpdateDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[macro_export] + macro_rules! debug_snapshot { + ($value:expr, @$snapshot:literal) => {{ + let value = format!("{:?}", $value); + meili_snap::snapshot!(value, @$snapshot); + }}; + } + + #[test] + fn simple_new() { + crate::IndexScheduler::test(true, vec![]); + } + + #[test] + fn query_tasks_from_and_limit() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + let kind = index_creation_task("doggo", "bone"); + let _task = index_scheduler.register(kind).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + let kind = index_creation_task("whalo", "plankton"); + let _task = index_scheduler.register(kind).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task"); + let kind = index_creation_task("catto", "his_own_vomit"); + let _task = index_scheduler.register(kind).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_third_task"); + + handle.advance_n_successful_batches(3); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_all_tasks"); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + let query = Query { limit: Some(0), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { limit: Some(1), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { limit: Some(2), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { from: Some(1), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { from: Some(2), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); + + let query = Query { from: Some(1), limit: Some(1), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query { from: Some(1), limit: Some(2), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + } + + #[test] + fn query_tasks_simple() { + let start_time = OffsetDateTime::now_utc(); + + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("whalo", "fish"); + let _task = index_scheduler.register(kind).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + + let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick + + let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test, which should excludes the enqueued tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should excludes all of them + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // both enqueued and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should exclude the enqueued tasks and include the only processing task + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + + let second_start_time = OffsetDateTime::now_utc(); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + after_started_at: Some(start_time), + before_started_at: Some(start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the test and before one minute after the start of the test, + // which should include all tasks + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query { + statuses: Some(vec![Status::Succeeded, Status::Processing]), + before_started_at: Some(start_time), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes before the start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // both succeeded and processing tasks in the first tick, but limited to those with a started_at + // that comes after the start of the second part of the test and before one minute after the + // second start of the test, which should exclude all tasks + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + // now we make one more batch, the started_at field of the new tasks will be past `second_start_time` + handle.advance_till([ + InsideProcessBatch, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + Start, + BatchCreated, + ]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // we run the same query to verify that, and indeed find that the last task is matched + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Enqueued, Status::Succeeded, Status::Processing]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // enqueued, succeeded, or processing tasks started after the second part of the test, should + // again only return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + let rtxn = index_scheduler.read_txn().unwrap(); + + // now the last task should have failed + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end"); + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // so running the last query should return nothing + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // but the same query on failed tasks should return the last task + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![1]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // same query but with an invalid uid + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query { + statuses: Some(vec![Status::Failed]), + uids: Some(vec![2]), + after_started_at: Some(second_start_time), + before_started_at: Some(second_start_time + Duration::minutes(1)), + ..Default::default() + }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // same query but with a valid uid + snapshot!(snapshot_bitmap(&tasks), @"[2,]"); + } + + #[test] + fn query_tasks_special_rules() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _task = index_scheduler.register(kind).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "whalo".to_owned()) }], + }; + let _task = index_scheduler.register(kind).unwrap(); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + handle.advance_till([Start, BatchCreated]); + + let rtxn = index_scheduler.env.read_txn().unwrap(); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // only the first task associated with catto is returned, the indexSwap tasks are excluded! + snapshot!(snapshot_bitmap(&tasks), @"[0,]"); + + let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() }; + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_owned()])) + .unwrap(); + // we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks + // associated with doggo -> empty result + snapshot!(snapshot_bitmap(&tasks), @"[]"); + + let query = Query::default(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_owned()])) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo tasks + // -> only the index creation of doggo should be returned + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + + let query = Query::default(); + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes( + &rtxn, + &query, + &Some(vec!["catto".to_owned(), "doggo".to_owned()]), + ) + .unwrap(); + // we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks + // -> all tasks except the swap of catto with whalo are returned + snapshot!(snapshot_bitmap(&tasks), @"[0,1,]"); + + let query = Query::default(); + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // we asked for all the tasks with all index authorized -> all tasks returned + snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]"); + } + + #[test] + fn query_tasks_canceled_by() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(3, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + let _ = index_scheduler.register(kind).unwrap(); + let kind = index_creation_task("doggo", "sheep"); + let _ = index_scheduler.register(kind).unwrap(); + let kind = KindWithContent::IndexSwap { + swaps: vec![IndexSwap { indexes: ("catto".to_owned(), "doggo".to_owned()) }], + }; + let _task = index_scheduler.register(kind).unwrap(); + + handle.advance_n_successful_batches(1); + let kind = KindWithContent::TaskCancelation { + query: "test_query".to_string(), + tasks: [0, 1, 2, 3].into_iter().collect(), + }; + let task_cancelation = index_scheduler.register(kind).unwrap(); + handle.advance_n_successful_batches(1); + + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "start"); + + let rtxn = index_scheduler.read_txn().unwrap(); + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let tasks = + index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap(); + // 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the + // taskCancelation itself + snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); + + let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() }; + let tasks = index_scheduler + .get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_string()])) + .unwrap(); + // Return only 1 because the user is not authorized to see task 2 + snapshot!(snapshot_bitmap(&tasks), @"[1,]"); + } + + #[test] + fn fail_in_process_batch_for_index_creation() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_register"); + + handle.advance_one_failed_batch(); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); + } + + #[test] + fn fail_in_process_batch_for_document_addition() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::InsideProcessBatch)]); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_till([Start, BatchCreated]); + + snapshot!( + snapshot_index_scheduler(&index_scheduler), + name: "document_addition_batch_created" + ); + + handle.advance_till([ProcessBatchFailed, AfterProcessing]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_failed"); + } + + #[test] + fn fail_in_update_task_after_process_batch_success_for_document_addition() { + let (index_scheduler, mut handle) = IndexScheduler::test( + true, + vec![(1, FailureLocation::UpdatingTaskAfterProcessBatchSuccess { task_uid: 0 })], + ); + + let content = r#" + { + "id": 1, + "doggo": "bob" + }"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = + meilisearch_types::document_formats::read_json(content.as_bytes(), file.as_file_mut()) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "document_addition_succeeded_but_index_scheduler_not_updated"); + + handle.advance_till([BatchCreated, InsideProcessBatch, ProcessBatchSucceeded]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_batch_succeeded"); + + // At this point the next time the scheduler will try to progress it should encounter + // a critical failure and have to wait for 1s before retrying anything. + + let before_failure = Instant::now(); + handle.advance_till([Start]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_to_commit"); + let failure_duration = before_failure.elapsed(); + assert!(failure_duration.as_millis() >= 1000); + + handle.advance_till([ + BatchCreated, + InsideProcessBatch, + ProcessBatchSucceeded, + AfterProcessing, + ]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "task_successfully_processed"); + } + + #[test] + fn test_document_addition_cant_create_index_without_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is no index currently. + // Thus, everything should be batched together and a IndexDoesNotExists + // error should be throwed. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_till([ + Start, + BatchCreated, + InsideProcessBatch, + ProcessBatchFailed, + AfterProcessing, + ]); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); + + // The index should not exists. + snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found."); + } + + #[test] + fn test_document_addition_cant_create_index_without_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the autobatching is disabled, every tasks should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_failed_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_failed_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // The index should not exists. + snapshot!(format!("{}", index_scheduler.index("doggos").map(|_| ()).unwrap_err()), @"Index `doggos` not found."); + } + + #[test] + fn test_document_addition_cant_create_index_with_index() { + // We're going to autobatch multiple document addition that don't have + // the right to create an index while there is already an index. + // Thus, everything should be batched together and no error should be + // throwed. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_n_successful_batches(1); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_10_tasks"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_addition_cant_create_index_with_index_without_autobatching() { + // We're going to execute multiple document addition that don't have + // the right to create an index while there is no index currently. + // Since the autobatching is disabled, every tasks should be processed + // sequentially and throw an IndexDoesNotExists. + let (index_scheduler, mut handle) = IndexScheduler::test(false, vec![]); + + // Create the index. + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: false, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Nothing should be batched thus half of the tasks are processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); + + // Everything is processed. + handle.advance_n_successful_batches(5); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_addition_mixed_rights_with_index() { + // We're going to autobatch multiple document addition. + // - The index already exists + // - The first document addition don't have the right to create an index + // can it batch with the other one? + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + // Create the index. + index_scheduler + .register(KindWithContent::IndexCreation { index_uid: S("doggos"), primary_key: None }) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "processed_the_first_task"); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // Everything should be batched together. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn test_document_addition_mixed_right_without_index_starts_with_cant_create() { + // We're going to autobatch multiple document addition. + // - The index does not exists + // - The first document addition don't have the right to create an index + // - The second do. They should not batch together. + // - The second should batch with everything else as it's going to create an index. + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + for i in 0..10 { + let content = format!( + r#"{{ + "id": {}, + "doggo": "bob {}" + }}"#, + i, i + ); + let allow_index_creation = i % 2 != 0; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(i).unwrap(); + let documents_count = meilisearch_types::document_formats::read_json( + content.as_bytes(), + file.as_file_mut(), + ) + .unwrap() as u64; + file.persist().unwrap(); + index_scheduler + .register(KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation, + }) + .unwrap(); + index_scheduler.assert_internally_consistent(); + } + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); + + // A first batch should be processed with only the first documentAddition that's going to fail. + handle.advance_one_failed_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "only_first_task_failed"); + + // Everything else should be batched together. + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); + + // Has everything being pushed successfully in milli? + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap()); + } + + #[test] + fn panic_in_process_batch_for_index_creation() { + let (index_scheduler, mut handle) = + IndexScheduler::test(true, vec![(1, FailureLocation::PanicInsideProcessBatch)]); + + let kind = index_creation_task("catto", "mouse"); + + let _task = index_scheduler.register(kind).unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task"); + + handle.advance_till([Start, BatchCreated, ProcessBatchFailed, AfterProcessing]); + + // Still in the first iteration + assert_eq!(*index_scheduler.run_loop_iteration.read().unwrap(), 1); + // No matter what happens in process_batch, the index_scheduler should be internally consistent + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "index_creation_failed"); + } +} diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap new file mode 100644 index 000000000..a06b82c74 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +assertion_line: 1755 +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,] +canceled [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: +1 [0,] + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap new file mode 100644 index 000000000..743e74a14 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap new file mode 100644 index 000000000..5c6078b51 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap @@ -0,0 +1,50 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[1,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +"taskCancelation" [3,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["beavero", "catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap new file mode 100644 index 000000000..f67fff59f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap @@ -0,0 +1,55 @@ +--- +source: index-scheduler/src/lib.rs +assertion_line: 1859 +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,3,] +canceled [1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +"taskCancelation" [3,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["beavero", "catto"] +---------------------------------------------------------------------- +### Canceled By: +3 [1,2,] + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap new file mode 100644 index 000000000..36d34ff93 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/first_task_processed.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap new file mode 100644 index 000000000..30da295f9 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap @@ -0,0 +1,50 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[1,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,] +"taskCancelation" [3,] +---------------------------------------------------------------------- +### Index Tasks: +beavero [1,] +catto [0,] +wolfo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap new file mode 100644 index 000000000..6074673e3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap new file mode 100644 index 000000000..f2035c7fe --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +assertion_line: 1818 +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,] +canceled [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: +1 [0,] + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap new file mode 100644 index 000000000..061f334c8 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap new file mode 100644 index 000000000..905cec451 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap new file mode 100644 index 000000000..d454b501e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/registered_the_first_task.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap new file mode 100644 index 000000000..b3842cc12 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"taskCancelation" [1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: +1 [] + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap new file mode 100644 index 000000000..e52a80fae --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/initial_task_processed.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap new file mode 100644 index 000000000..d454b501e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/registered_the_first_task.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap new file mode 100644 index 000000000..f9195857a --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/do_not_batch_task_of_different_indexes/all_tasks_processed.snap @@ -0,0 +1,62 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "cattos", primary_key: None }} +2 {uid: 2, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "girafos", primary_key: None }} +3 {uid: 3, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +4 {uid: 4, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "cattos" }} +5 {uid: 5, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "girafos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [3,4,5,] +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +cattos [1,4,] +doggos [0,3,] +girafos [2,5,] +---------------------------------------------------------------------- +### Index Mapper: +["cattos", "doggos", "girafos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap new file mode 100644 index 000000000..3e654a0e2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/after_register.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap new file mode 100644 index 000000000..10291b206 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap b/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap new file mode 100644 index 000000000..6079a4317 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition/once_everything_is_processed.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap new file mode 100644 index 000000000..379e90120 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/before_index_creation.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"indexCreation" [0,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap new file mode 100644 index 000000000..2ff82bfd2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/both_task_succeeded.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"indexCreation" [0,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap new file mode 100644 index 000000000..e23cd648f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap new file mode 100644 index 000000000..86674ccd0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_second_task.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap new file mode 100644 index 000000000..f4d3a8190 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion/registered_the_third_task.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"indexCreation" [0,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap new file mode 100644 index 000000000..e0813f109 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/1.snap @@ -0,0 +1,40 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"indexDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap new file mode 100644 index 000000000..f8586b7b8 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_index_deletion_on_unexisting_index/2.snap @@ -0,0 +1,42 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +"indexDeletion" [1,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap new file mode 100644 index 000000000..10291b206 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap new file mode 100644 index 000000000..c1bfd7db9 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap new file mode 100644 index 000000000..3e654a0e2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/registered_the_first_task.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap new file mode 100644 index 000000000..63a2d606e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/after_register.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap new file mode 100644 index 000000000..252ae082e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap new file mode 100644 index 000000000..bdda4e086 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap new file mode 100644 index 000000000..bdda4e086 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap new file mode 100644 index 000000000..3e654a0e2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/document_addition_succeeded_but_index_scheduler_not_updated.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap new file mode 100644 index 000000000..3e654a0e2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/registered_the_first_task.snap @@ -0,0 +1,37 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap new file mode 100644 index 000000000..6079a4317 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/task_successfully_processed.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap new file mode 100644 index 000000000..c75a3b87e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +index_a [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap new file mode 100644 index 000000000..656b06ad3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +index_a [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap new file mode 100644 index 000000000..0cf82317b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_b", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +index_a [0,] +index_b [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap new file mode 100644 index 000000000..8b73d12c2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap @@ -0,0 +1,42 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[0,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_b", primary_key: Some("id") }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "index_a" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +index_a [0,2,] +index_b [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap b/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap new file mode 100644 index 000000000..60d8c4cdb --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/index_creation_failed.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "An unexpected crash occurred when processing the task.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap new file mode 100644 index 000000000..63a2d606e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/panic_in_process_batch_for_index_creation/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap new file mode 100644 index 000000000..3a4705635 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_first_task.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "cattos", primary_key: None }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +cattos [1,] +doggos [0,2,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap new file mode 100644 index 000000000..979ec8af6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_second_task.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "cattos", primary_key: None }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [2,] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +cattos [1,] +doggos [0,2,] +---------------------------------------------------------------------- +### Index Mapper: +["cattos", "doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap new file mode 100644 index 000000000..c7190dd8b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/processed_the_third_task.snap @@ -0,0 +1,49 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "cattos", primary_key: None }} +2 {uid: 2, status: succeeded, details: { deleted_documents: Some(0) }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +cattos [1,] +doggos [0,2,] +---------------------------------------------------------------------- +### Index Mapper: +["cattos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap new file mode 100644 index 000000000..e23cd648f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap new file mode 100644 index 000000000..82cc517cb --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_second_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "cattos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +cattos [1,] +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap new file mode 100644 index 000000000..76a6b3f08 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_inserted_without_new_signal/registered_the_third_task.snap @@ -0,0 +1,42 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "cattos", primary_key: None }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: IndexDeletion { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +cattos [1,] +doggos [0,2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap new file mode 100644 index 000000000..fa09eba28 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/first.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +3 {uid: 3, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap new file mode 100644 index 000000000..e52c36718 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/fourth.snap @@ -0,0 +1,52 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +2 {uid: 2, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +3 {uid: 3, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap new file mode 100644 index 000000000..52866bed6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap new file mode 100644 index 000000000..6ac8aa79f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_fourth_task.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +3 {uid: 3, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap new file mode 100644 index 000000000..32d32daaf --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_second_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap new file mode 100644 index 000000000..75ceef14d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/registered_the_third_task.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,2,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap new file mode 100644 index 000000000..4b1577aa6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/second.snap @@ -0,0 +1,48 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +2 {uid: 2, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +3 {uid: 3, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [2,3,] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap new file mode 100644 index 000000000..2ac3b141f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/process_tasks_without_autobatching/third.snap @@ -0,0 +1,50 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +2 {uid: 2, status: succeeded, details: { deleted_documents: Some(0) }, kind: DocumentClear { index_uid: "doggos" }} +3 {uid: 3, status: enqueued, details: { deleted_documents: None }, kind: DocumentClear { index_uid: "doggos" }} +---------------------------------------------------------------------- +### Status: +enqueued [3,] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentDeletion" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap new file mode 100644 index 000000000..624606ba9 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap @@ -0,0 +1,53 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: canceled, canceled_by: 3, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }} +2 {uid: 2, status: canceled, canceled_by: 3, details: { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,3,] +canceled [1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexSwap" [2,] +"taskCancelation" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,2,] +doggo [1,2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: +3 [1,2,] + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap new file mode 100644 index 000000000..694bbff26 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/processed_all_tasks.snap @@ -0,0 +1,49 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("plankton") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("plankton") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("his_own_vomit") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("his_own_vomit") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [2,] +doggo [0,] +whalo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto", "doggo", "whalo"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap new file mode 100644 index 000000000..c1a0899cd --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggo [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap new file mode 100644 index 000000000..6daa6bce2 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_second_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("plankton") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("plankton") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +doggo [0,] +whalo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap new file mode 100644 index 000000000..8427679e7 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_from_and_limit/registered_the_third_task.snap @@ -0,0 +1,42 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("plankton") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("plankton") }} +2 {uid: 2, status: enqueued, details: { primary_key: Some("his_own_vomit") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("his_own_vomit") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [2,] +doggo [0,] +whalo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap new file mode 100644 index 000000000..65838db64 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap @@ -0,0 +1,50 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +failed [2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +whalo [2,] +---------------------------------------------------------------------- +### Index Mapper: +["catto", "doggo"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap new file mode 100644 index 000000000..aed5aed8c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/start.snap @@ -0,0 +1,42 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }} +2 {uid: 2, status: enqueued, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +whalo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap b/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap new file mode 100644 index 000000000..2bb4f7590 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_special_rules/start.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }} +2 {uid: 2, status: enqueued, details: { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }} +3 {uid: 3, status: enqueued, details: { swaps: [IndexSwap { indexes: ("catto", "whalo") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("catto", "whalo") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,] +"indexSwap" [2,3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,2,3,] +doggo [1,2,] +whalo [3,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/register/everything_is_succesfully_registered.snap b/index-scheduler/src/snapshots/lib.rs/register/everything_is_succesfully_registered.snap new file mode 100644 index 000000000..360752bc6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/register/everything_is_succesfully_registered.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 12, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 12, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 50, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 50, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 5000, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 5000, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,2,] +doggo [3,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap new file mode 100644 index 000000000..2c009ef1a --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_a.snap @@ -0,0 +1,48 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap new file mode 100644 index 000000000..6d6e89c5f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_b.snap @@ -0,0 +1,50 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [2,3,] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap new file mode 100644 index 000000000..c12334ecf --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_c.snap @@ -0,0 +1,52 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [3,] +succeeded [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap new file mode 100644 index 000000000..b20b3b320 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/create_d.snap @@ -0,0 +1,54 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap new file mode 100644 index 000000000..17e8936f0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_processed.snap @@ -0,0 +1,61 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }} +5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [5,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,5,] +---------------------------------------------------------------------- +### Index Tasks: +a [1,4,5,] +b [0,4,] +c [3,4,5,] +d [2,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap new file mode 100644 index 000000000..f2c74f676 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/first_swap_registered.snap @@ -0,0 +1,57 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +4 {uid: 4, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [4,] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,4,] +b [1,4,] +c [2,4,] +d [3,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap new file mode 100644 index 000000000..acfbc4c77 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/second_swap_processed.snap @@ -0,0 +1,63 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }} +5 {uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,5,] +---------------------------------------------------------------------- +### Index Tasks: +a [3,4,5,] +b [0,4,] +c [1,4,5,] +d [2,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap new file mode 100644 index 000000000..c7c6faae6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap @@ -0,0 +1,67 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +4 {uid: 4, status: succeeded, details: { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("c", "b") }, IndexSwap { indexes: ("a", "d") }] }} +5 {uid: 5, status: succeeded, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }} +6 {uid: 6, status: succeeded, details: { swaps: [] }, kind: IndexSwap { swaps: [] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,5,6,] +---------------------------------------------------------------------- +### Index Tasks: +a [3,4,5,] +b [0,4,] +c [1,4,5,] +d [2,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap new file mode 100644 index 000000000..0f8355f25 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes/two_swaps_registered.snap @@ -0,0 +1,59 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +4 {uid: 4, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "d") }] }} +5 {uid: 5, status: enqueued, details: { swaps: [IndexSwap { indexes: ("a", "c") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "c") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [4,5,] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,5,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,4,5,] +b [1,4,] +c [2,4,5,] +d [3,4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap new file mode 100644 index 000000000..b20b3b320 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/after_the_index_creation.snap @@ -0,0 +1,54 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap new file mode 100644 index 000000000..fd9790835 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/first_swap_failed.snap @@ -0,0 +1,62 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Indexes `e`, `f` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "e") }, IndexSwap { indexes: ("d", "f") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("a", "b") }, IndexSwap { indexes: ("c", "e") }, IndexSwap { indexes: ("d", "f") }] }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,] +failed [4,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +"indexSwap" [4,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,4,] +b [1,4,] +c [2,4,] +d [3,4,] +e [4,] +f [4,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap new file mode 100644 index 000000000..b20b3b320 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/swap_indexes_errors/initial_tasks_processed.snap @@ -0,0 +1,54 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "a", primary_key: Some("id") }} +1 {uid: 1, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "b", primary_key: Some("id") }} +2 {uid: 2, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "c", primary_key: Some("id") }} +3 {uid: 3, status: succeeded, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "d", primary_key: Some("id") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,1,2,3,] +---------------------------------------------------------------------- +### Index Tasks: +a [0,] +b [1,] +c [2,] +d [3,] +---------------------------------------------------------------------- +### Index Mapper: +["a", "b", "c", "d"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap new file mode 100644 index 000000000..fc37dcf2d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_enqueued.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap new file mode 100644 index 000000000..e4c4d9d7e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/initial_tasks_processed.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap new file mode 100644 index 000000000..8874cc9e0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"taskDeletion" [2,3,] +---------------------------------------------------------------------- +### Index Tasks: +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap new file mode 100644 index 000000000..3c3bd754e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap @@ -0,0 +1,46 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +"taskDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap new file mode 100644 index 000000000..fc37dcf2d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_enqueued.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap new file mode 100644 index 000000000..e4c4d9d7e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/initial_tasks_processed.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,] +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap new file mode 100644 index 000000000..29c251027 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"taskDeletion" [2,] +---------------------------------------------------------------------- +### Index Tasks: +doggo [1,] +---------------------------------------------------------------------- +### Index Mapper: +["catto"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [2,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [2,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap new file mode 100644 index 000000000..afb8af39c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/initial_tasks_enqueued.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap new file mode 100644 index 000000000..6fc0a4f7c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap @@ -0,0 +1,50 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,] +succeeded [3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +"taskDeletion" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [3,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [3,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap new file mode 100644 index 000000000..e2ad01246 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +"taskDeletion" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap new file mode 100644 index 000000000..8017f77b9 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap @@ -0,0 +1,47 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[3,] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,] +"indexCreation" [0,] +"taskDeletion" [3,] +---------------------------------------------------------------------- +### Index Tasks: +catto [0,1,] +doggo [2,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/1.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/1.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap new file mode 100644 index 000000000..d112c8145 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_processing_the_10_tasks.snap @@ -0,0 +1,62 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..7daafcccb --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/after_registering_the_10_tasks.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,10,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap new file mode 100644 index 000000000..ed265ac6e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/processed_the_first_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap new file mode 100644 index 000000000..e23cd648f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/1.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/1.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..83f17bcef --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/after_registering_the_10_tasks.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,10,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap new file mode 100644 index 000000000..fc2fdc5f1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/all_tasks_processed.snap @@ -0,0 +1,80 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap new file mode 100644 index 000000000..48f972785 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/five_tasks_processed.snap @@ -0,0 +1,75 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [6,7,8,9,10,] +succeeded [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap new file mode 100644 index 000000000..6214f3139 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/processed_the_first_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap new file mode 100644 index 000000000..52866bed6 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_with_index_without_autobatching/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap new file mode 100644 index 000000000..ed28c121b --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_processing_the_10_tasks.snap @@ -0,0 +1,57 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..828d4dafc --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index/after_registering_the_10_tasks.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..671713c8e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/after_registering_the_10_tasks.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap new file mode 100644 index 000000000..d995cab9e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/all_tasks_processed.snap @@ -0,0 +1,75 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [] +failed [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap new file mode 100644 index 000000000..3ae875bff --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_cant_create_index_without_index_without_autobatching/five_tasks_processed.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: false }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +failed [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/1.snap new file mode 100644 index 000000000..cbd8d175a --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/1.snap @@ -0,0 +1,41 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..ad5968b58 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/after_registering_the_10_tasks.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap new file mode 100644 index 000000000..19ee47359 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/all_tasks_processed.snap @@ -0,0 +1,60 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [1,2,3,4,5,6,7,8,9,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap new file mode 100644 index 000000000..ed57bc4e3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_right_without_index_starts_with_cant_create/only_first_task_failed.snap @@ -0,0 +1,66 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,] +failed [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/1.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/1.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..61b7f3016 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/after_registering_the_10_tasks.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,2,3,4,5,6,7,8,9,10,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap new file mode 100644 index 000000000..0962dcdf5 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/all_tasks_processed.snap @@ -0,0 +1,62 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: false }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: false }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: false }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: false }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: false }} +10 {uid: 10, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,2,3,4,5,6,7,8,9,10,] +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +[timestamp] [10,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,2,3,4,5,6,7,8,9,10,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap new file mode 100644 index 000000000..ed265ac6e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/processed_the_first_task.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap new file mode 100644 index 000000000..e23cd648f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_addition_mixed_rights_with_index/registered_the_first_task.snap @@ -0,0 +1,36 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { primary_key: None }, kind: IndexCreation { index_uid: "doggos", primary_key: None }} +---------------------------------------------------------------------- +### Status: +enqueued [0,] +---------------------------------------------------------------------- +### Kind: +"indexCreation" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap new file mode 100644 index 000000000..a47ef319f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace/1.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap new file mode 100644 index 000000000..f6423719c --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace/2.snap @@ -0,0 +1,57 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/1.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/1.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..0f52c9664 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/after_registering_the_10_tasks.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap new file mode 100644 index 000000000..b80b8bb40 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/all_tasks_processed.snap @@ -0,0 +1,75 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap new file mode 100644 index 000000000..b1528c103 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_replace_without_autobatching/five_tasks_processed.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap new file mode 100644 index 000000000..6157fb454 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update/1.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap new file mode 100644 index 000000000..736f998d0 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update/2.snap @@ -0,0 +1,57 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update/3.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update/3.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update/3.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/1.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/1.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/1.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..85fda1a43 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/after_registering_the_10_tasks.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap new file mode 100644 index 000000000..a1fc55210 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/all_tasks_processed.snap @@ -0,0 +1,75 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap new file mode 100644 index 000000000..fb0b629ec --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_document_update_without_autobatching/five_tasks_processed.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = false +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/1.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/1.snap new file mode 100644 index 000000000..5a839838d --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/1.snap @@ -0,0 +1,45 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 0, + "doggo": "bob 0" + }, + { + "id": 1, + "doggo": "bob 1" + }, + { + "id": 2, + "doggo": "bob 2" + }, + { + "id": 3, + "doggo": "bob 3" + }, + { + "id": 4, + "doggo": "bob 4" + }, + { + "id": 5, + "doggo": "bob 5" + }, + { + "id": 6, + "doggo": "bob 6" + }, + { + "id": 7, + "doggo": "bob 7" + }, + { + "id": 8, + "doggo": "bob 8" + }, + { + "id": 9, + "doggo": "bob 9" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap new file mode 100644 index 000000000..330a3318e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/after_registering_the_10_tasks.snap @@ -0,0 +1,64 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +[] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 +00000000-0000-0000-0000-000000000001 +00000000-0000-0000-0000-000000000002 +00000000-0000-0000-0000-000000000003 +00000000-0000-0000-0000-000000000004 +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap new file mode 100644 index 000000000..20fda049f --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/all_tasks_processed.snap @@ -0,0 +1,75 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap new file mode 100644 index 000000000..9fd990aa9 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/test_mixed_document_addition/five_tasks_processed.snap @@ -0,0 +1,70 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [5,6,7,8,9,] +succeeded [0,1,2,3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,6,7,8,9,] +---------------------------------------------------------------------- +### Index Mapper: +["doggos"] +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +[timestamp] [6,] +[timestamp] [7,] +[timestamp] [8,] +[timestamp] [9,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000005 +00000000-0000-0000-0000-000000000006 +00000000-0000-0000-0000-000000000007 +00000000-0000-0000-0000-000000000008 +00000000-0000-0000-0000-000000000009 + +---------------------------------------------------------------------- + diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs new file mode 100644 index 000000000..a193c2bec --- /dev/null +++ b/index-scheduler/src/utils.rs @@ -0,0 +1,511 @@ +//! Utility functions on the DBs. Mainly getter and setters. + +use std::collections::{BTreeSet, HashSet}; +use std::ops::Bound; + +use meilisearch_types::heed::types::{DecodeIgnore, OwnedType}; +use meilisearch_types::heed::{Database, RoTxn, RwTxn}; +use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32}; +use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status}; +use roaring::{MultiOps, RoaringBitmap}; +use time::OffsetDateTime; + +use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128}; + +impl IndexScheduler { + pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result { + enum_iterator::all().map(|s| self.get_status(rtxn, s)).union() + } + + pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result> { + Ok(self.all_tasks.remap_data_type::().last(rtxn)?.map(|(k, _)| k.get() + 1)) + } + + pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result { + Ok(self.last_task_id(rtxn)?.unwrap_or_default()) + } + + pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result> { + Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?) + } + + /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a + /// `CorruptedTaskQueue` error will be throwed. + pub(crate) fn get_existing_tasks( + &self, + rtxn: &RoTxn, + tasks: impl IntoIterator, + ) -> Result> { + tasks + .into_iter() + .map(|task_id| { + self.get_task(rtxn, task_id).and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) + }) + .collect::>() + } + + pub(crate) fn update_task(&self, wtxn: &mut RwTxn, task: &Task) -> Result<()> { + let old_task = self.get_task(wtxn, task.uid)?.ok_or(Error::CorruptedTaskQueue)?; + + debug_assert_eq!(old_task.uid, task.uid); + + if old_task == *task { + return Ok(()); + } + + if old_task.status != task.status { + self.update_status(wtxn, old_task.status, |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_status(wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + if old_task.kind.as_kind() != task.kind.as_kind() { + self.update_kind(wtxn, old_task.kind.as_kind(), |bitmap| { + bitmap.remove(task.uid); + })?; + self.update_kind(wtxn, task.kind.as_kind(), |bitmap| { + bitmap.insert(task.uid); + })?; + } + + assert_eq!( + old_task.enqueued_at, task.enqueued_at, + "Cannot update a task's enqueued_at time" + ); + if old_task.started_at != task.started_at { + assert!(old_task.started_at.is_none(), "Cannot update a task's started_at time"); + if let Some(started_at) = task.started_at { + insert_task_datetime(wtxn, self.started_at, started_at, task.uid)?; + } + } + if old_task.finished_at != task.finished_at { + assert!(old_task.finished_at.is_none(), "Cannot update a task's finished_at time"); + if let Some(finished_at) = task.finished_at { + insert_task_datetime(wtxn, self.finished_at, finished_at, task.uid)?; + } + } + + self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?; + Ok(()) + } + + /// Returns the whole set of tasks that belongs to this index. + pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result { + Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default()) + } + + pub(crate) fn update_index( + &self, + wtxn: &mut RwTxn, + index: &str, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.index_tasks(wtxn, index)?; + f(&mut tasks); + if tasks.is_empty() { + self.index_tasks.delete(wtxn, index)?; + } else { + self.index_tasks.put(wtxn, index, &tasks)?; + } + + Ok(()) + } + + pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result { + Ok(self.status.get(rtxn, &status)?.unwrap_or_default()) + } + + pub(crate) fn put_status( + &self, + wtxn: &mut RwTxn, + status: Status, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.status.put(wtxn, &status, bitmap)?) + } + + pub(crate) fn update_status( + &self, + wtxn: &mut RwTxn, + status: Status, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_status(wtxn, status)?; + f(&mut tasks); + self.put_status(wtxn, status, &tasks)?; + + Ok(()) + } + + pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result { + Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default()) + } + + pub(crate) fn put_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + bitmap: &RoaringBitmap, + ) -> Result<()> { + Ok(self.kind.put(wtxn, &kind, bitmap)?) + } + + pub(crate) fn update_kind( + &self, + wtxn: &mut RwTxn, + kind: Kind, + f: impl Fn(&mut RoaringBitmap), + ) -> Result<()> { + let mut tasks = self.get_kind(wtxn, kind)?; + f(&mut tasks); + self.put_kind(wtxn, kind, &tasks)?; + + Ok(()) + } +} + +pub(crate) fn insert_task_datetime( + wtxn: &mut RwTxn, + database: Database, CboRoaringBitmapCodec>, + time: OffsetDateTime, + task_id: TaskId, +) -> Result<()> { + let timestamp = BEI128::new(time.unix_timestamp_nanos()); + let mut task_ids = database.get(wtxn, ×tamp)?.unwrap_or_default(); + task_ids.insert(task_id); + database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(task_ids))?; + Ok(()) +} + +pub(crate) fn remove_task_datetime( + wtxn: &mut RwTxn, + database: Database, CboRoaringBitmapCodec>, + time: OffsetDateTime, + task_id: TaskId, +) -> Result<()> { + let timestamp = BEI128::new(time.unix_timestamp_nanos()); + if let Some(mut existing) = database.get(wtxn, ×tamp)? { + existing.remove(task_id); + if existing.is_empty() { + database.delete(wtxn, ×tamp)?; + } else { + database.put(wtxn, ×tamp, &RoaringBitmap::from_iter(existing))?; + } + } + + Ok(()) +} + +pub(crate) fn keep_tasks_within_datetimes( + rtxn: &RoTxn, + tasks: &mut RoaringBitmap, + database: Database, CboRoaringBitmapCodec>, + after: Option, + before: Option, +) -> Result<()> { + let (start, end) = match (&after, &before) { + (None, None) => return Ok(()), + (None, Some(before)) => (Bound::Unbounded, Bound::Excluded(*before)), + (Some(after), None) => (Bound::Excluded(*after), Bound::Unbounded), + (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)), + }; + let mut collected_task_ids = RoaringBitmap::new(); + let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos())); + let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos())); + let iter = database.range(rtxn, &(start, end))?; + for r in iter { + let (_timestamp, task_ids) = r?; + collected_task_ids |= task_ids; + } + *tasks &= collected_task_ids; + Ok(()) +} + +// TODO: remove when Bound::map ( https://github.com/rust-lang/rust/issues/86026 ) is available on stable +pub(crate) fn map_bound(bound: Bound, map: impl FnOnce(T) -> U) -> Bound { + match bound { + Bound::Included(x) => Bound::Included(map(x)), + Bound::Excluded(x) => Bound::Excluded(map(x)), + Bound::Unbounded => Bound::Unbounded, + } +} + +pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) { + use KindWithContent as K; + let mut index_uids = vec![]; + match &mut task.kind { + K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid), + K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid), + K::DocumentClear { index_uid } => index_uids.push(index_uid), + K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid), + K::IndexDeletion { index_uid } => index_uids.push(index_uid), + K::IndexCreation { index_uid, .. } => index_uids.push(index_uid), + K::IndexUpdate { index_uid, .. } => index_uids.push(index_uid), + K::IndexSwap { swaps } => { + for IndexSwap { indexes: (lhs, rhs) } in swaps.iter_mut() { + if lhs == swap.0 || lhs == swap.1 { + index_uids.push(lhs); + } + if rhs == swap.0 || rhs == swap.1 { + index_uids.push(rhs); + } + } + } + K::TaskCancelation { .. } + | K::TaskDeletion { .. } + | K::DumpCreation { .. } + | K::SnapshotCreation => (), + }; + if let Some(Details::IndexSwap { swaps }) = &mut task.details { + for IndexSwap { indexes: (lhs, rhs) } in swaps.iter_mut() { + if lhs == swap.0 || lhs == swap.1 { + index_uids.push(lhs); + } + if rhs == swap.0 || rhs == swap.1 { + index_uids.push(rhs); + } + } + } + for index_uid in index_uids { + if index_uid == swap.0 { + *index_uid = swap.1.to_owned(); + } else if index_uid == swap.1 { + *index_uid = swap.0.to_owned(); + } + } +} + +/// Remove references to task ids that are greater than the id of the given task. +pub(crate) fn filter_out_references_to_newer_tasks(task: &mut Task) { + let new_nbr_of_matched_tasks = match &mut task.kind { + KindWithContent::TaskCancelation { tasks, .. } + | KindWithContent::TaskDeletion { tasks, .. } => { + tasks.remove_range(task.uid..); + tasks.len() + } + _ => return, + }; + if let Some( + Details::TaskCancelation { matched_tasks, .. } + | Details::TaskDeletion { matched_tasks, .. }, + ) = &mut task.details + { + *matched_tasks = new_nbr_of_matched_tasks; + } +} + +pub(crate) fn check_index_swap_validity(task: &Task) -> Result<()> { + let swaps = + if let KindWithContent::IndexSwap { swaps } = &task.kind { swaps } else { return Ok(()) }; + let mut all_indexes = HashSet::new(); + let mut duplicate_indexes = BTreeSet::new(); + for IndexSwap { indexes: (lhs, rhs) } in swaps { + for name in [lhs, rhs] { + let is_new = all_indexes.insert(name); + if !is_new { + duplicate_indexes.insert(name); + } + } + } + if !duplicate_indexes.is_empty() { + if duplicate_indexes.len() == 1 { + return Err(Error::SwapDuplicateIndexFound( + duplicate_indexes.into_iter().next().unwrap().clone(), + )); + } else { + return Err(Error::SwapDuplicateIndexesFound( + duplicate_indexes.into_iter().cloned().collect(), + )); + } + } + Ok(()) +} + +#[cfg(test)] +impl IndexScheduler { + /// Asserts that the index scheduler's content is internally consistent. + pub fn assert_internally_consistent(&self) { + let rtxn = self.env.read_txn().unwrap(); + for task in self.all_tasks.iter(&rtxn).unwrap() { + let (task_id, task) = task.unwrap(); + let task_id = task_id.get(); + + let task_index_uid = task.index_uid().map(ToOwned::to_owned); + + let Task { + uid, + enqueued_at, + started_at, + finished_at, + error: _, + canceled_by, + details, + status, + kind, + } = task; + assert_eq!(uid, task.uid); + if let Some(task_index_uid) = &task_index_uid { + assert!(self + .index_tasks + .get(&rtxn, task_index_uid.as_str()) + .unwrap() + .unwrap() + .contains(task.uid)); + } + let db_enqueued_at = self + .enqueued_at + .get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos())) + .unwrap() + .unwrap(); + assert!(db_enqueued_at.contains(task_id)); + if let Some(started_at) = started_at { + let db_started_at = self + .started_at + .get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos())) + .unwrap() + .unwrap(); + assert!(db_started_at.contains(task_id)); + } + if let Some(finished_at) = finished_at { + let db_finished_at = self + .finished_at + .get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos())) + .unwrap() + .unwrap(); + assert!(db_finished_at.contains(task_id)); + } + if let Some(canceled_by) = canceled_by { + let db_canceled_tasks = self.get_status(&rtxn, Status::Canceled).unwrap(); + assert!(db_canceled_tasks.contains(uid)); + let db_canceling_task = self.get_task(&rtxn, canceled_by).unwrap().unwrap(); + assert_eq!(db_canceling_task.status, Status::Succeeded); + match db_canceling_task.kind { + KindWithContent::TaskCancelation { query: _, tasks } => { + assert!(tasks.contains(uid)); + } + _ => panic!(), + } + } + if let Some(details) = details { + match details { + Details::IndexSwap { swaps: sw1 } => { + if let KindWithContent::IndexSwap { swaps: sw2 } = &kind { + assert_eq!(&sw1, sw2); + } + } + Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => { + assert_eq!(kind.as_kind(), Kind::DocumentAdditionOrUpdate); + match indexed_documents { + Some(0) => assert_ne!(status, Status::Enqueued), + Some(indexed_documents) => { + assert_eq!(status, Status::Succeeded); + assert!(indexed_documents <= received_documents); + } + None => { + assert_ne!(status, Status::Succeeded); + assert_ne!(status, Status::Canceled); + assert_ne!(status, Status::Failed); + } + } + } + Details::SettingsUpdate { settings: _ } => { + assert_eq!(kind.as_kind(), Kind::SettingsUpdate); + } + Details::IndexInfo { primary_key: pk1 } => match &kind { + KindWithContent::IndexCreation { index_uid, primary_key: pk2 } + | KindWithContent::IndexUpdate { index_uid, primary_key: pk2 } => { + self.index_tasks + .get(&rtxn, index_uid.as_str()) + .unwrap() + .unwrap() + .contains(uid); + assert_eq!(&pk1, pk2); + } + _ => panic!(), + }, + Details::DocumentDeletion { + provided_ids: received_document_ids, + deleted_documents, + } => { + if let Some(deleted_documents) = deleted_documents { + assert_eq!(status, Status::Succeeded); + assert!(deleted_documents <= received_document_ids as u64); + assert_eq!(kind.as_kind(), Kind::DocumentDeletion); + + match &kind { + KindWithContent::DocumentDeletion { index_uid, documents_ids } => { + assert_eq!(&task_index_uid.unwrap(), index_uid); + assert!(documents_ids.len() >= received_document_ids); + } + _ => panic!(), + } + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::ClearAll { deleted_documents } => { + assert!(matches!( + kind.as_kind(), + Kind::DocumentDeletion | Kind::IndexDeletion + )); + if deleted_documents.is_some() { + assert_eq!(status, Status::Succeeded); + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => { + if let Some(canceled_tasks) = canceled_tasks { + assert_eq!(status, Status::Succeeded); + assert!(canceled_tasks <= matched_tasks); + match &kind { + KindWithContent::TaskCancelation { query, tasks } => { + assert_eq!(query, &original_filter); + assert_eq!(tasks.len(), matched_tasks); + } + _ => panic!(), + } + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => { + if let Some(deleted_tasks) = deleted_tasks { + assert_eq!(status, Status::Succeeded); + assert!(deleted_tasks <= matched_tasks); + match &kind { + KindWithContent::TaskDeletion { query, tasks } => { + assert_eq!(query, &original_filter); + assert_eq!(tasks.len(), matched_tasks); + } + _ => panic!(), + } + } else { + assert_ne!(status, Status::Succeeded); + } + } + Details::Dump { dump_uid: _ } => { + assert_eq!(kind.as_kind(), Kind::DumpCreation); + } + } + } + + assert!(self.get_status(&rtxn, status).unwrap().contains(uid)); + assert!(self.get_kind(&rtxn, kind.as_kind()).unwrap().contains(uid)); + + if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind { + match status { + Status::Enqueued | Status::Processing => { + assert!(self.file_store.__all_uuids().contains(&content_file)); + } + Status::Succeeded | Status::Failed | Status::Canceled => { + assert!(!self.file_store.__all_uuids().contains(&content_file)); + } + } + } + } + } +} diff --git a/meili-snap/Cargo.toml b/meili-snap/Cargo.toml new file mode 100644 index 000000000..6706bf693 --- /dev/null +++ b/meili-snap/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "meili-snap" +version = "0.30.0" +edition = "2021" + +[dependencies] +insta = { version = "^1.19.1", features = ["json", "redactions"] } +md5 = "0.7.0" +once_cell = "1.15" diff --git a/meili-snap/src/lib.rs b/meili-snap/src/lib.rs new file mode 100644 index 000000000..a2abd0cea --- /dev/null +++ b/meili-snap/src/lib.rs @@ -0,0 +1,276 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Mutex; + +pub use insta; +use once_cell::sync::Lazy; + +static SNAPSHOT_NAMES: Lazy>> = Lazy::new(Mutex::default); + +/// Return the md5 hash of the given string +pub fn hash_snapshot(snap: &str) -> String { + let hash = md5::compute(snap.as_bytes()); + let hash_str = format!("{hash:x}"); + hash_str +} + +#[track_caller] +pub fn default_snapshot_settings_for_test<'a>( + test_name: &str, + name: Option<&'a str>, +) -> (insta::Settings, Cow<'a, str>, bool) { + let mut settings = insta::Settings::clone_current(); + settings.set_prepend_module_to_snapshot(false); + let path = Path::new(std::panic::Location::caller().file()); + let filename = path.file_name().unwrap().to_str().unwrap(); + settings.set_omit_expression(true); + + let test_name = test_name.strip_suffix("::{{closure}}").unwrap_or(test_name); + let test_name = test_name.rsplit("::").next().unwrap().to_owned(); + + let path = Path::new("snapshots").join(filename).join(&test_name); + settings.set_snapshot_path(path.clone()); + let snap_name = if let Some(name) = name { + Cow::Borrowed(name) + } else { + let mut snapshot_names = SNAPSHOT_NAMES.lock().unwrap(); + let counter = snapshot_names.entry(path).or_default(); + *counter += 1; + Cow::Owned(format!("{counter}")) + }; + + let store_whole_snapshot = + std::env::var("MEILI_TEST_FULL_SNAPS").unwrap_or_else(|_| "false".to_owned()); + let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap(); + + (settings, snap_name, store_whole_snapshot) +} + +/** +Create a hashed snapshot test. + +## Arguments: + +1. The content of the snapshot. It is an expression whose result implements the `fmt::Display` trait. +2. `name: `: the identifier for the snapshot test (optional) +3. `@""` to write the hash of the snapshot inline + +## Behaviour +The content of the snapshot will be saved both in full and as a hash. The full snapshot will +be saved with the name `.full.snap` but will not be saved to the git repository. The hashed +snapshot will be saved inline. If `` is not specified, then a global counter is used to give an +identifier to the snapshot. + +Running `cargo test` will check whether the old snapshot is identical to the +current one. If they are equal, the test passes. Otherwise, the test fails. + +Use the command line `cargo insta` to approve or reject new snapshots. + +## Example +```ignore +// The full snapshot is saved under 1.full.snap and contains `10` +snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820"); +// The full snapshot is saved under snap_name.full.snap and contains `hello world` +snapshot_hash!("hello world", name: "snap_name", @"5f93f983524def3dca464469d2cf9f3e"); +``` +*/ +#[macro_export] +macro_rules! snapshot_hash { + ($value:expr, @$inline:literal) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + let test_name = test_name + .strip_suffix("::{{closure}}") + .unwrap_or(test_name); + + let (settings, snap_name, store_whole_snapshot) = $crate::default_snapshot_settings_for_test(test_name, None); + settings.bind(|| { + let snap = format!("{}", $value); + let hash_snap = $crate::hash_snapshot(&snap); + meili_snap::insta::assert_snapshot!(hash_snap, @$inline); + if store_whole_snapshot { + meili_snap::insta::assert_snapshot!(format!("{}.full", snap_name), snap); + } + }); + }; + ($value:expr, name: $name:expr, @$inline:literal) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + + let snap_name = format!("{}", $name); + let (settings, snap_name, store_whole_snapshot) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name)); + settings.bind(|| { + let snap = format!("{}", $value); + let hash_snap = $crate::hash_snapshot(&snap); + meili_snap::insta::assert_snapshot!(hash_snap, @$inline); + if store_whole_snapshot { + meili_snap::insta::assert_snapshot!(format!("{}.full", snap_name), snap); + } + }); + }; +} + +/** +Create a hashed snapshot test. + +## Arguments: +1. The content of the snapshot. It is an expression whose result implements the `fmt::Display` trait. +2. Optionally one of: + 1. `name: `: the identifier for the snapshot test + 2. `@""` to write the hash of the snapshot inline + +## Behaviour +The content of the snapshot will be saved in full with the given name +or using a global counter to give it an identifier. + +Running `cargo test` will check whether the old snapshot is identical to the +current one. If they are equal, the test passes. Otherwise, the test fails. + +Use the command line `cargo insta` to approve or reject new snapshots. + +## Example +```ignore +// The full snapshot is saved under 1.snap and contains `10` +snapshot!(10); +// The full snapshot is saved under snap_name.snap and contains `10` +snapshot!("hello world", name: "snap_name"); +// The full snapshot is saved inline +snapshot!(format!("{:?}", vec![1, 2]), @"[1, 2]"); +``` +*/ +#[macro_export] +macro_rules! snapshot { + ($value:expr, name: $name:expr) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + let test_name = test_name + .strip_suffix("::{{closure}}") + .unwrap_or(test_name); + + let snap_name = format!("{}", $name); + let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name)); + settings.bind(|| { + let snap = format!("{}", $value); + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + }); + }; + ($value:expr, @$inline:literal) => { + // Note that the name given as argument does not matter since it is only an inline snapshot + // We don't pass None because otherwise `meili-snap` will try to assign it a unique identifier + let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument")); + settings.bind(|| { + let snap = format!("{}", $value); + meili_snap::insta::assert_snapshot!(snap, @$inline); + }); + }; + ($value:expr) => { + let test_name = { + fn f() {} + fn type_name_of_val(_: T) -> &'static str { + std::any::type_name::() + } + type_name_of_val(f).strip_suffix("::f").unwrap_or("") + }; + let test_name = test_name + .strip_suffix("::{{closure}}") + .unwrap_or(test_name); + + let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None); + settings.bind(|| { + let snap = format!("{}", $value); + meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap); + }); + }; +} + +#[cfg(test)] +mod tests { + use crate as meili_snap; + #[test] + fn snap() { + snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820"); + snapshot_hash!(20, @"98f13708210194c475687be6106a3b84"); + snapshot_hash!(30, @"34173cb38f07f89ddbebc2ac9128303f"); + + snapshot!(40, @"40"); + snapshot!(50, @"50"); + snapshot!(60, @"60"); + + snapshot!(70); + snapshot!(80); + snapshot!(90); + + snapshot!(100, name: "snap_name_1"); + snapshot_hash!(110, name: "snap_name_2", @"5f93f983524def3dca464469d2cf9f3e"); + + snapshot!(120); + snapshot!(format!("{:?}", vec![1, 2]), @"[1, 2]"); + } + + // Currently the name of this module is not part of the snapshot path + // It does not bother me, but maybe it is worth changing later on. + mod snap { + use crate as meili_snap; + #[test] + fn some_test() { + snapshot_hash!(10, @"d3d9446802a44259755d38e6d163e820"); + snapshot_hash!(20, @"98f13708210194c475687be6106a3b84"); + snapshot_hash!(30, @"34173cb38f07f89ddbebc2ac9128303f"); + + snapshot!(40, @"40"); + snapshot!(50, @"50"); + snapshot!(60, @"60"); + + snapshot!(70); + snapshot!(80); + snapshot!(90); + + snapshot!(100, name: "snap_name_1"); + snapshot_hash!(110, name: "snap_name_2", @"5f93f983524def3dca464469d2cf9f3e"); + + snapshot!(120); + + // snapshot_hash!("", name: "", @"d41d8cd98f00b204e9800998ecf8427e"); + } + } +} + +/// Create a string from the value by serializing it as Json, optionally +/// redacting some parts of it. +/// +/// The second argument to the macro can be an object expression for redaction. +/// It's in the form { selector => replacement }. For more information about redactions +/// refer to the redactions feature in the `insta` guide. +#[macro_export] +macro_rules! json_string { + ($value:expr, {$($k:expr => $v:expr),*$(,)?}) => { + { + let (_, snap) = meili_snap::insta::_prepare_snapshot_for_redaction!($value, {$($k => $v),*}, Json, File); + snap + } + }; + ($value:expr) => {{ + let value = meili_snap::insta::_macro_support::serialize_value( + &$value, + meili_snap::insta::_macro_support::SerializationFormat::Json, + meili_snap::insta::_macro_support::SnapshotLocation::File + ); + value + }}; +} diff --git a/meili-snap/src/snapshots/lib.rs/snap/4.snap b/meili-snap/src/snapshots/lib.rs/snap/4.snap new file mode 100644 index 000000000..5d0878f16 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/4.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +70 diff --git a/meili-snap/src/snapshots/lib.rs/snap/5.snap b/meili-snap/src/snapshots/lib.rs/snap/5.snap new file mode 100644 index 000000000..ea547b823 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/5.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +80 diff --git a/meili-snap/src/snapshots/lib.rs/snap/6.snap b/meili-snap/src/snapshots/lib.rs/snap/6.snap new file mode 100644 index 000000000..e91bbe6f7 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/6.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +90 diff --git a/meili-snap/src/snapshots/lib.rs/snap/7.snap b/meili-snap/src/snapshots/lib.rs/snap/7.snap new file mode 100644 index 000000000..5ae6bb922 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/7.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +120 diff --git a/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap b/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap new file mode 100644 index 000000000..3964679e6 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/snap/snap_name_1.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +100 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/4.snap b/meili-snap/src/snapshots/lib.rs/some_test/4.snap new file mode 100644 index 000000000..5d0878f16 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/4.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +70 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/5.snap b/meili-snap/src/snapshots/lib.rs/some_test/5.snap new file mode 100644 index 000000000..ea547b823 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/5.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +80 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/6.snap b/meili-snap/src/snapshots/lib.rs/some_test/6.snap new file mode 100644 index 000000000..e91bbe6f7 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/6.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +90 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/7.snap b/meili-snap/src/snapshots/lib.rs/some_test/7.snap new file mode 100644 index 000000000..5ae6bb922 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/7.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +120 diff --git a/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap b/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap new file mode 100644 index 000000000..3964679e6 --- /dev/null +++ b/meili-snap/src/snapshots/lib.rs/some_test/snap_name_1.snap @@ -0,0 +1,4 @@ +--- +source: meili-snap/src/lib.rs +--- +100 diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index e8bd9bd63..f0b73b539 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -1,17 +1,17 @@ [package] name = "meilisearch-auth" -version = "0.29.2" +version = "0.30.0" edition = "2021" [dependencies] -enum-iterator = "0.7.0" +enum-iterator = "1.1.3" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.5" } -rand = "0.8.4" -serde = { version = "1.0.136", features = ["derive"] } +rand = "0.8.5" +roaring = { version = "0.10.0", features = ["serde"] } +serde = { version = "1.0.145", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } -sha2 = "0.10.2" -thiserror = "1.0.30" -time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +sha2 = "0.10.6" +thiserror = "1.0.37" +time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] } uuid = { version = "1.1.2", features = ["serde", "v4"] } diff --git a/meilisearch-auth/src/action.rs b/meilisearch-auth/src/action.rs deleted file mode 100644 index 7c6a2e50c..000000000 --- a/meilisearch-auth/src/action.rs +++ /dev/null @@ -1,134 +0,0 @@ -use enum_iterator::IntoEnumIterator; -use serde::{Deserialize, Serialize}; -use std::hash::Hash; - -#[derive(IntoEnumIterator, Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash)] -#[repr(u8)] -pub enum Action { - #[serde(rename = "*")] - All = 0, - #[serde(rename = "search")] - Search, - #[serde(rename = "documents.*")] - DocumentsAll, - #[serde(rename = "documents.add")] - DocumentsAdd, - #[serde(rename = "documents.get")] - DocumentsGet, - #[serde(rename = "documents.delete")] - DocumentsDelete, - #[serde(rename = "indexes.*")] - IndexesAll, - #[serde(rename = "indexes.create")] - IndexesAdd, - #[serde(rename = "indexes.get")] - IndexesGet, - #[serde(rename = "indexes.update")] - IndexesUpdate, - #[serde(rename = "indexes.delete")] - IndexesDelete, - #[serde(rename = "tasks.*")] - TasksAll, - #[serde(rename = "tasks.get")] - TasksGet, - #[serde(rename = "settings.*")] - SettingsAll, - #[serde(rename = "settings.get")] - SettingsGet, - #[serde(rename = "settings.update")] - SettingsUpdate, - #[serde(rename = "stats.*")] - StatsAll, - #[serde(rename = "stats.get")] - StatsGet, - #[serde(rename = "metrics.*")] - MetricsAll, - #[serde(rename = "metrics.get")] - MetricsGet, - #[serde(rename = "dumps.*")] - DumpsAll, - #[serde(rename = "dumps.create")] - DumpsCreate, - #[serde(rename = "version")] - Version, - #[serde(rename = "keys.create")] - KeysAdd, - #[serde(rename = "keys.get")] - KeysGet, - #[serde(rename = "keys.update")] - KeysUpdate, - #[serde(rename = "keys.delete")] - KeysDelete, -} - -impl Action { - pub const fn from_repr(repr: u8) -> Option { - use actions::*; - match repr { - ALL => Some(Self::All), - SEARCH => Some(Self::Search), - DOCUMENTS_ALL => Some(Self::DocumentsAll), - DOCUMENTS_ADD => Some(Self::DocumentsAdd), - DOCUMENTS_GET => Some(Self::DocumentsGet), - DOCUMENTS_DELETE => Some(Self::DocumentsDelete), - INDEXES_ALL => Some(Self::IndexesAll), - INDEXES_CREATE => Some(Self::IndexesAdd), - INDEXES_GET => Some(Self::IndexesGet), - INDEXES_UPDATE => Some(Self::IndexesUpdate), - INDEXES_DELETE => Some(Self::IndexesDelete), - TASKS_ALL => Some(Self::TasksAll), - TASKS_GET => Some(Self::TasksGet), - SETTINGS_ALL => Some(Self::SettingsAll), - SETTINGS_GET => Some(Self::SettingsGet), - SETTINGS_UPDATE => Some(Self::SettingsUpdate), - STATS_ALL => Some(Self::StatsAll), - STATS_GET => Some(Self::StatsGet), - METRICS_ALL => Some(Self::MetricsAll), - METRICS_GET => Some(Self::MetricsGet), - DUMPS_ALL => Some(Self::DumpsAll), - DUMPS_CREATE => Some(Self::DumpsCreate), - VERSION => Some(Self::Version), - KEYS_CREATE => Some(Self::KeysAdd), - KEYS_GET => Some(Self::KeysGet), - KEYS_UPDATE => Some(Self::KeysUpdate), - KEYS_DELETE => Some(Self::KeysDelete), - _otherwise => None, - } - } - - pub const fn repr(&self) -> u8 { - *self as u8 - } -} - -pub mod actions { - use super::Action::*; - - pub(crate) const ALL: u8 = All.repr(); - pub const SEARCH: u8 = Search.repr(); - pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr(); - pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr(); - pub const DOCUMENTS_GET: u8 = DocumentsGet.repr(); - pub const DOCUMENTS_DELETE: u8 = DocumentsDelete.repr(); - pub const INDEXES_ALL: u8 = IndexesAll.repr(); - pub const INDEXES_CREATE: u8 = IndexesAdd.repr(); - pub const INDEXES_GET: u8 = IndexesGet.repr(); - pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr(); - pub const INDEXES_DELETE: u8 = IndexesDelete.repr(); - pub const TASKS_ALL: u8 = TasksAll.repr(); - pub const TASKS_GET: u8 = TasksGet.repr(); - pub const SETTINGS_ALL: u8 = SettingsAll.repr(); - pub const SETTINGS_GET: u8 = SettingsGet.repr(); - pub const SETTINGS_UPDATE: u8 = SettingsUpdate.repr(); - pub const STATS_ALL: u8 = StatsAll.repr(); - pub const STATS_GET: u8 = StatsGet.repr(); - pub const METRICS_ALL: u8 = MetricsAll.repr(); - pub const METRICS_GET: u8 = MetricsGet.repr(); - pub const DUMPS_ALL: u8 = DumpsAll.repr(); - pub const DUMPS_CREATE: u8 = DumpsCreate.repr(); - pub const VERSION: u8 = Version.repr(); - pub const KEYS_CREATE: u8 = KeysAdd.repr(); - pub const KEYS_GET: u8 = KeysGet.repr(); - pub const KEYS_UPDATE: u8 = KeysUpdate.repr(); - pub const KEYS_DELETE: u8 = KeysDelete.repr(); -} diff --git a/meilisearch-auth/src/dump.rs b/meilisearch-auth/src/dump.rs index 7e607e574..0b26bf7da 100644 --- a/meilisearch-auth/src/dump.rs +++ b/meilisearch-auth/src/dump.rs @@ -1,10 +1,9 @@ -use serde_json::Deserializer; - use std::fs::File; -use std::io::BufReader; -use std::io::Write; +use std::io::{BufReader, Write}; use std::path::Path; +use serde_json::Deserializer; + use crate::{AuthController, HeedAuthStore, Result}; const KEYS_PATH: &str = "keys"; diff --git a/meilisearch-auth/src/error.rs b/meilisearch-auth/src/error.rs index bb96be789..37d3dce60 100644 --- a/meilisearch-auth/src/error.rs +++ b/meilisearch-auth/src/error.rs @@ -1,41 +1,24 @@ use std::error::Error; use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use serde_json::Value; +use meilisearch_types::{internal_error, keys}; pub type Result = std::result::Result; #[derive(Debug, thiserror::Error)] pub enum AuthControllerError { - #[error("`{0}` field is mandatory.")] - MissingParameter(&'static str), - #[error("`actions` field value `{0}` is invalid. It should be an array of string representing action names.")] - InvalidApiKeyActions(Value), - #[error("`indexes` field value `{0}` is invalid. It should be an array of string representing index names.")] - InvalidApiKeyIndexes(Value), - #[error("`expiresAt` field value `{0}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")] - InvalidApiKeyExpiresAt(Value), - #[error("`description` field value `{0}` is invalid. It should be a string or specified as a null value.")] - InvalidApiKeyDescription(Value), - #[error( - "`name` field value `{0}` is invalid. It should be a string or specified as a null value." - )] - InvalidApiKeyName(Value), - #[error("`uid` field value `{0}` is invalid. It should be a valid UUID v4 string or omitted.")] - InvalidApiKeyUid(Value), #[error("API key `{0}` not found.")] ApiKeyNotFound(String), #[error("`uid` field value `{0}` is already an existing API key.")] ApiKeyAlreadyExists(String), - #[error("The `{0}` field cannot be modified for the given resource.")] - ImmutableField(String), + #[error(transparent)] + ApiKey(#[from] keys::Error), #[error("Internal error: {0}")] Internal(Box), } internal_error!( - AuthControllerError: milli::heed::Error, + AuthControllerError: meilisearch_types::milli::heed::Error, std::io::Error, serde_json::Error, std::str::Utf8Error @@ -44,16 +27,9 @@ internal_error!( impl ErrorCode for AuthControllerError { fn error_code(&self) -> Code { match self { - Self::MissingParameter(_) => Code::MissingParameter, - Self::InvalidApiKeyActions(_) => Code::InvalidApiKeyActions, - Self::InvalidApiKeyIndexes(_) => Code::InvalidApiKeyIndexes, - Self::InvalidApiKeyExpiresAt(_) => Code::InvalidApiKeyExpiresAt, - Self::InvalidApiKeyDescription(_) => Code::InvalidApiKeyDescription, - Self::InvalidApiKeyName(_) => Code::InvalidApiKeyName, + Self::ApiKey(e) => e.error_code(), Self::ApiKeyNotFound(_) => Code::ApiKeyNotFound, - Self::InvalidApiKeyUid(_) => Code::InvalidApiKeyUid, Self::ApiKeyAlreadyExists(_) => Code::ApiKeyAlreadyExists, - Self::ImmutableField(_) => Code::ImmutableField, Self::Internal(_) => Code::Internal, } } diff --git a/meilisearch-auth/src/key.rs b/meilisearch-auth/src/key.rs deleted file mode 100644 index eb72aaa72..000000000 --- a/meilisearch-auth/src/key.rs +++ /dev/null @@ -1,201 +0,0 @@ -use crate::action::Action; -use crate::error::{AuthControllerError, Result}; -use crate::store::KeyId; - -use meilisearch_types::index_uid::IndexUid; -use meilisearch_types::star_or::StarOr; -use serde::{Deserialize, Serialize}; -use serde_json::{from_value, Value}; -use time::format_description::well_known::Rfc3339; -use time::macros::{format_description, time}; -use time::{Date, OffsetDateTime, PrimitiveDateTime}; -use uuid::Uuid; - -#[derive(Debug, Deserialize, Serialize)] -pub struct Key { - #[serde(skip_serializing_if = "Option::is_none")] - pub description: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub name: Option, - pub uid: KeyId, - pub actions: Vec, - pub indexes: Vec>, - #[serde(with = "time::serde::rfc3339::option")] - pub expires_at: Option, - #[serde(with = "time::serde::rfc3339")] - pub created_at: OffsetDateTime, - #[serde(with = "time::serde::rfc3339")] - pub updated_at: OffsetDateTime, -} - -impl Key { - pub fn create_from_value(value: Value) -> Result { - let name = match value.get("name") { - None | Some(Value::Null) => None, - Some(des) => from_value(des.clone()) - .map(Some) - .map_err(|_| AuthControllerError::InvalidApiKeyName(des.clone()))?, - }; - - let description = match value.get("description") { - None | Some(Value::Null) => None, - Some(des) => from_value(des.clone()) - .map(Some) - .map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone()))?, - }; - - let uid = value.get("uid").map_or_else( - || Ok(Uuid::new_v4()), - |uid| { - from_value(uid.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyUid(uid.clone())) - }, - )?; - - let actions = value - .get("actions") - .map(|act| { - from_value(act.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyActions(act.clone())) - }) - .ok_or(AuthControllerError::MissingParameter("actions"))??; - - let indexes = value - .get("indexes") - .map(|ind| { - from_value(ind.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyIndexes(ind.clone())) - }) - .ok_or(AuthControllerError::MissingParameter("indexes"))??; - - let expires_at = value - .get("expiresAt") - .map(parse_expiration_date) - .ok_or(AuthControllerError::MissingParameter("expiresAt"))??; - - let created_at = OffsetDateTime::now_utc(); - let updated_at = created_at; - - Ok(Self { - name, - description, - uid, - actions, - indexes, - expires_at, - created_at, - updated_at, - }) - } - - pub fn update_from_value(&mut self, value: Value) -> Result<()> { - if let Some(des) = value.get("description") { - let des = from_value(des.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyDescription(des.clone())); - self.description = des?; - } - - if let Some(des) = value.get("name") { - let des = from_value(des.clone()) - .map_err(|_| AuthControllerError::InvalidApiKeyName(des.clone())); - self.name = des?; - } - - if value.get("uid").is_some() { - return Err(AuthControllerError::ImmutableField("uid".to_string())); - } - - if value.get("actions").is_some() { - return Err(AuthControllerError::ImmutableField("actions".to_string())); - } - - if value.get("indexes").is_some() { - return Err(AuthControllerError::ImmutableField("indexes".to_string())); - } - - if value.get("expiresAt").is_some() { - return Err(AuthControllerError::ImmutableField("expiresAt".to_string())); - } - - if value.get("createdAt").is_some() { - return Err(AuthControllerError::ImmutableField("createdAt".to_string())); - } - - if value.get("updatedAt").is_some() { - return Err(AuthControllerError::ImmutableField("updatedAt".to_string())); - } - - self.updated_at = OffsetDateTime::now_utc(); - - Ok(()) - } - - pub(crate) fn default_admin() -> Self { - let now = OffsetDateTime::now_utc(); - let uid = Uuid::new_v4(); - Self { - name: Some("Default Admin API Key".to_string()), - description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()), - uid, - actions: vec![Action::All], - indexes: vec![StarOr::Star], - expires_at: None, - created_at: now, - updated_at: now, - } - } - - pub(crate) fn default_search() -> Self { - let now = OffsetDateTime::now_utc(); - let uid = Uuid::new_v4(); - Self { - name: Some("Default Search API Key".to_string()), - description: Some("Use it to search from the frontend".to_string()), - uid, - actions: vec![Action::Search], - indexes: vec![StarOr::Star], - expires_at: None, - created_at: now, - updated_at: now, - } - } -} - -fn parse_expiration_date(value: &Value) -> Result> { - match value { - Value::String(string) => OffsetDateTime::parse(string, &Rfc3339) - .or_else(|_| { - PrimitiveDateTime::parse( - string, - format_description!( - "[year repr:full base:calendar]-[month repr:numerical]-[day]T[hour]:[minute]:[second]" - ), - ).map(|datetime| datetime.assume_utc()) - }) - .or_else(|_| { - PrimitiveDateTime::parse( - string, - format_description!( - "[year repr:full base:calendar]-[month repr:numerical]-[day] [hour]:[minute]:[second]" - ), - ).map(|datetime| datetime.assume_utc()) - }) - .or_else(|_| { - Date::parse(string, format_description!( - "[year repr:full base:calendar]-[month repr:numerical]-[day]" - )).map(|date| PrimitiveDateTime::new(date, time!(00:00)).assume_utc()) - }) - .map_err(|_| AuthControllerError::InvalidApiKeyExpiresAt(value.clone())) - // check if the key is already expired. - .and_then(|d| { - if d > OffsetDateTime::now_utc() { - Ok(d) - } else { - Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone())) - } - }) - .map(Option::Some), - Value::Null => Ok(None), - _otherwise => Err(AuthControllerError::InvalidApiKeyExpiresAt(value.clone())), - } -} diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index 43183d4cf..020a2821c 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -1,7 +1,5 @@ -mod action; mod dump; pub mod error; -mod key; mod store; use std::collections::{HashMap, HashSet}; @@ -9,19 +7,16 @@ use std::ops::Deref; use std::path::Path; use std::sync::Arc; +use error::{AuthControllerError, Result}; +use meilisearch_types::keys::{Action, Key}; +use meilisearch_types::star_or::StarOr; use serde::{Deserialize, Serialize}; use serde_json::Value; +pub use store::open_auth_store_env; +use store::{generate_key_as_hexa, HeedAuthStore}; use time::OffsetDateTime; use uuid::Uuid; -pub use action::{actions, Action}; -use error::{AuthControllerError, Result}; -pub use key::Key; -use meilisearch_types::star_or::StarOr; -use store::generate_key_as_hexa; -pub use store::open_auth_store_env; -use store::HeedAuthStore; - #[derive(Clone)] pub struct AuthController { store: Arc, @@ -36,18 +31,13 @@ impl AuthController { generate_default_keys(&store)?; } - Ok(Self { - store: Arc::new(store), - master_key: master_key.clone(), - }) + Ok(Self { store: Arc::new(store), master_key: master_key.clone() }) } pub fn create_key(&self, value: Value) -> Result { let key = Key::create_from_value(value)?; match self.store.get_api_key(key.uid)? { - Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists( - key.uid.to_string(), - )), + Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(key.uid.to_string())), None => self.store.put_api_key(key), } } @@ -66,9 +56,9 @@ impl AuthController { pub fn get_optional_uid_from_encoded_key(&self, encoded_key: &[u8]) -> Result> { match &self.master_key { - Some(master_key) => self - .store - .get_uid_from_encoded_key(encoded_key, master_key.as_bytes()), + Some(master_key) => { + self.store.get_uid_from_encoded_key(encoded_key, master_key.as_bytes()) + } None => Ok(None), } } @@ -134,9 +124,7 @@ impl AuthController { /// Generate a valid key from a key id using the current master key. /// Returns None if no master key has been set. pub fn generate_key(&self, uid: Uuid) -> Option { - self.master_key - .as_ref() - .map(|master_key| generate_key_as_hexa(uid, master_key.as_bytes())) + self.master_key.as_ref().map(|master_key| generate_key_as_hexa(uid, master_key.as_bytes())) } /// Check if the provided key is authorized to make a specific action @@ -154,8 +142,7 @@ impl AuthController { .or(match index { // else check if the key has access to the requested index. Some(index) => { - self.store - .get_expiration_date(uid, action, Some(index.as_bytes()))? + self.store.get_expiration_date(uid, action, Some(index.as_bytes()))? } // or to any index if no index has been requested. None => self.store.prefix_first_expiration_date(uid, action)?, @@ -168,6 +155,17 @@ impl AuthController { None => Ok(false), } } + + /// Delete all the keys in the DB. + pub fn raw_delete_all_keys(&mut self) -> Result<()> { + self.store.delete_all_keys() + } + + /// Delete all the keys in the DB. + pub fn raw_insert_key(&mut self, key: Key) -> Result<()> { + self.store.put_api_key(key)?; + Ok(()) + } } pub struct AuthFilter { @@ -177,10 +175,7 @@ pub struct AuthFilter { impl Default for AuthFilter { fn default() -> Self { - Self { - search_rules: SearchRules::default(), - allow_index_creation: true, - } + Self { search_rules: SearchRules::default(), allow_index_creation: true } } } @@ -215,10 +210,30 @@ impl SearchRules { None } } - Self::Map(map) => map - .get(index) - .or_else(|| map.get("*")) - .map(|isr| isr.clone().unwrap_or_default()), + Self::Map(map) => { + map.get(index).or_else(|| map.get("*")).map(|isr| isr.clone().unwrap_or_default()) + } + } + } + + /// Return the list of indexes such that `self.is_index_authorized(index) == true`, + /// or `None` if all indexes satisfy this condition. + pub fn authorized_indexes(&self) -> Option> { + match self { + SearchRules::Set(set) => { + if set.contains("*") { + None + } else { + Some(set.iter().cloned().collect()) + } + } + SearchRules::Map(map) => { + if map.contains_key("*") { + None + } else { + Some(map.keys().cloned().collect()) + } + } } } } diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index a8b27e06a..b3f9ed672 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -1,19 +1,19 @@ use std::borrow::Cow; use std::cmp::Reverse; use std::collections::HashSet; -use std::convert::TryFrom; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::fs::create_dir_all; use std::ops::Deref; use std::path::Path; use std::str; use std::sync::Arc; -use enum_iterator::IntoEnumIterator; use hmac::{Hmac, Mac}; +use meilisearch_types::keys::KeyId; +use meilisearch_types::milli; +use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; +use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use meilisearch_types::star_or::StarOr; -use milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson}; -use milli::heed::{Database, Env, EnvOpenOptions, RwTxn}; use sha2::Sha256; use time::OffsetDateTime; use uuid::fmt::Hyphenated; @@ -27,8 +27,6 @@ const AUTH_DB_PATH: &str = "auth"; const KEY_DB_NAME: &str = "api-keys"; const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expiration"; -pub type KeyId = Uuid; - #[derive(Clone)] pub struct HeedAuthStore { env: Arc, @@ -60,12 +58,7 @@ impl HeedAuthStore { let keys = env.create_database(Some(KEY_DB_NAME))?; let action_keyid_index_expiration = env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?; - Ok(Self { - env, - keys, - action_keyid_index_expiration, - should_close_on_drop: true, - }) + Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) } pub fn set_drop_on_close(&mut self, v: bool) { @@ -92,15 +85,11 @@ impl HeedAuthStore { let mut actions = HashSet::new(); for action in &key.actions { match action { - Action::All => actions.extend(Action::into_enum_iter()), + Action::All => actions.extend(enum_iterator::all::()), Action::DocumentsAll => { actions.extend( - [ - Action::DocumentsGet, - Action::DocumentsDelete, - Action::DocumentsAdd, - ] - .iter(), + [Action::DocumentsGet, Action::DocumentsDelete, Action::DocumentsAdd] + .iter(), ); } Action::IndexesAll => { @@ -110,6 +99,7 @@ impl HeedAuthStore { Action::IndexesDelete, Action::IndexesGet, Action::IndexesUpdate, + Action::IndexesSwap, ] .iter(), ); @@ -121,7 +111,7 @@ impl HeedAuthStore { actions.insert(Action::DumpsCreate); } Action::TasksAll => { - actions.insert(Action::TasksGet); + actions.extend([Action::TasksGet, Action::TasksDelete, Action::TasksCancel]); } Action::StatsAll => { actions.insert(Action::StatsGet); @@ -198,6 +188,13 @@ impl HeedAuthStore { Ok(existing) } + pub fn delete_all_keys(&self) -> Result<()> { + let mut wtxn = self.env.write_txn()?; + self.keys.clear(&mut wtxn)?; + wtxn.commit()?; + Ok(()) + } + pub fn list_api_keys(&self) -> Result> { let mut list = Vec::new(); let rtxn = self.env.read_txn()?; diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 63f2fb200..6a4cc0336 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -4,93 +4,102 @@ description = "Meilisearch HTTP server" edition = "2021" license = "MIT" name = "meilisearch-http" -version = "0.29.2" +version = "0.30.0" [[bin]] name = "meilisearch" path = "src/main.rs" -[build-dependencies] -anyhow = { version = "1.0.62", optional = true } -cargo_toml = { version = "0.11.4", optional = true } -hex = { version = "0.4.3", optional = true } -reqwest = { version = "0.11.9", features = ["blocking", "rustls-tls"], default-features = false, optional = true } -sha-1 = { version = "0.10.0", optional = true } -static-files = { version = "0.2.3", optional = true } -tempfile = { version = "3.3.0", optional = true } -vergen = { version = "7.0.0", default-features = false, features = ["git"] } -zip = { version = "0.5.13", optional = true } - [dependencies] -actix-cors = "0.6.1" -actix-web = { version = "4.0.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] } +actix-cors = "0.6.3" +actix-http = { version = "3.2.2", default-features = false, features = ["compress-brotli", "compress-gzip", "rustls"] } +actix-web = { version = "4.2.1", default-features = false, features = ["macros", "compress-brotli", "compress-gzip", "cookies", "rustls"] } actix-web-static-files = { git = "https://github.com/kilork/actix-web-static-files.git", rev = "2d3b6160", optional = true } -anyhow = { version = "1.0.62", features = ["backtrace"] } +anyhow = { version = "1.0.65", features = ["backtrace"] } async-stream = "0.3.3" -async-trait = "0.1.52" -bstr = "0.2.17" +async-trait = "0.1.57" +bstr = "1.0.1" byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] } -bytes = "1.1.0" -clap = { version = "3.1.6", features = ["derive", "env"] } -crossbeam-channel = "0.5.2" -either = "1.6.1" -env_logger = "0.9.0" -flate2 = "1.0.22" +bytes = "1.2.1" +clap = { version = "4.0.9", features = ["derive", "env"] } +crossbeam-channel = "0.5.6" +dump = { path = "../dump" } +either = "1.8.0" +env_logger = "0.9.1" +file-store = { path = "../file-store" } +flate2 = "1.0.24" fst = "0.4.7" -futures = "0.3.21" -futures-util = "0.3.21" -http = "0.2.6" -indexmap = { version = "1.8.0", features = ["serde-1"] } -itertools = "0.10.3" -jsonwebtoken = "8.0.1" -log = "0.4.14" +futures = "0.3.24" +futures-util = "0.3.24" +http = "0.2.8" +index-scheduler = { path = "../index-scheduler" } +indexmap = { version = "1.9.1", features = ["serde-1"] } +itertools = "0.10.5" +jsonwebtoken = "8.1.1" +lazy_static = "1.4.0" +log = "0.4.17" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -meilisearch-lib = { path = "../meilisearch-lib" } mimalloc = { version = "0.1.29", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" -once_cell = "1.10.0" -parking_lot = "0.12.0" -pin-project-lite = "0.2.8" +once_cell = "1.15.0" +parking_lot = "0.12.1" +permissive-json-pointer = { path = "../permissive-json-pointer" } +pin-project-lite = "0.2.9" platform-dirs = "0.3.0" +prometheus = { version = "0.13.2", features = ["process"], optional = true } rand = "0.8.5" -rayon = "1.5.1" -regex = "1.5.5" -reqwest = { version = "0.11.4", features = ["rustls-tls", "json"], default-features = false } -rustls = "0.20.4" -rustls-pemfile = "0.3.0" -segment = { version = "0.2.0", optional = true } -serde = { version = "1.0.136", features = ["derive"] } -serde-cs = "0.2.3" +rayon = "1.5.3" +regex = "1.6.0" +reqwest = { version = "0.11.12", features = ["rustls-tls", "json"], default-features = false } +rustls = "0.20.6" +rustls-pemfile = "1.0.1" +segment = { version = "0.2.1", optional = true } +serde = { version = "1.0.145", features = ["derive"] } +serde-cs = "0.2.4" serde_json = { version = "1.0.85", features = ["preserve_order"] } -sha2 = "0.10.2" +sha2 = "0.10.6" siphasher = "0.3.10" slice-group-by = "0.3.0" static-files = { version = "0.2.3", optional = true } -sysinfo = "0.23.5" +sysinfo = "0.26.4" tar = "0.4.38" tempfile = "3.3.0" -thiserror = "1.0.30" -time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } -tokio = { version = "1.17.0", features = ["full"] } -tokio-stream = "0.1.8" +thiserror = "1.0.37" +time = { version = "0.3.15", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tokio = { version = "1.21.2", features = ["full"] } +tokio-stream = "0.1.10" +toml = "0.5.9" uuid = { version = "1.1.2", features = ["serde", "v4"] } walkdir = "2.3.2" -prometheus = { version = "0.13.0", features = ["process"], optional = true } -lazy_static = "1.4.0" +yaup = "0.2.0" [dev-dependencies] actix-rt = "2.7.0" -assert-json-diff = "2.0.1" -manifest-dir-macros = "0.1.14" +assert-json-diff = "2.0.2" +brotli = "3.3.4" +manifest-dir-macros = "0.1.16" maplit = "1.0.2" -urlencoding = "2.1.0" -yaup = "0.2.0" +meili-snap = {path = "../meili-snap"} +temp-env = "0.3.1" +urlencoding = "2.1.2" +yaup = "0.2.1" + +[build-dependencies] +anyhow = { version = "1.0.65", optional = true } +cargo_toml = { version = "0.12.4", optional = true } +hex = { version = "0.4.3", optional = true } +reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true } +sha-1 = { version = "0.10.0", optional = true } +static-files = { version = "0.2.3", optional = true } +tempfile = { version = "3.3.0", optional = true } +vergen = { version = "7.4.2", default-features = false, features = ["git"] } +zip = { version = "0.6.2", optional = true } [features] -default = ["analytics", "mini-dashboard"] +default = ["analytics", "meilisearch-types/default", "mini-dashboard"] metrics = ["prometheus"] analytics = ["segment"] mini-dashboard = [ @@ -104,7 +113,11 @@ mini-dashboard = [ "tempfile", "zip", ] +chinese = ["meilisearch-types/chinese"] +hebrew = ["meilisearch-types/hebrew"] +japanese = ["meilisearch-types/japanese"] +thai = ["meilisearch-types/thai"] [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.2/build.zip" -sha1 = "c69feffc6b590e38a46981a85c47f48905d4082a" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.3/build.zip" +sha1 = "fb893012023cc33090c549e0eaf10adff335cf6f" diff --git a/meilisearch-http/build.rs b/meilisearch-http/build.rs index 1822cae00..e2207561b 100644 --- a/meilisearch-http/build.rs +++ b/meilisearch-http/build.rs @@ -72,11 +72,8 @@ mod mini_dashboard { resource_dir(&dashboard_dir).build()?; // Write the sha1 for the dashboard back to file. - let mut file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(sha1_path)?; + let mut file = + OpenOptions::new().write(true).create(true).truncate(true).open(sha1_path)?; file.write_all(sha1.as_bytes())?; file.flush()?; diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs index 01838f223..ad45a1ac8 100644 --- a/meilisearch-http/src/analytics/mock_analytics.rs +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -1,16 +1,21 @@ -use std::{any::Any, sync::Arc}; +use std::any::Any; +use std::sync::Arc; use actix_web::HttpRequest; +use meilisearch_types::InstanceUid; use serde_json::Value; -use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; +use super::{find_user_id, Analytics, DocumentDeletionKind}; +use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::routes::tasks::TasksFilterQueryRaw; +use crate::Opt; -use super::{find_user_id, Analytics}; - -pub struct MockAnalytics; +pub struct MockAnalytics { + instance_uid: Option, +} #[derive(Default)] -pub struct SearchAggregator {} +pub struct SearchAggregator; #[allow(dead_code)] impl SearchAggregator { @@ -23,13 +28,17 @@ impl SearchAggregator { impl MockAnalytics { #[allow(clippy::new_ret_no_self)] - pub fn new(opt: &Opt) -> (Arc, String) { - let user = find_user_id(&opt.db_path).unwrap_or_default(); - (Arc::new(Self), user) + pub fn new(opt: &Opt) -> Arc { + let instance_uid = find_user_id(&opt.db_path); + Arc::new(Self { instance_uid }) } } impl Analytics for MockAnalytics { + fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> { + self.instance_uid.as_ref() + } + // These methods are noop and should be optimized out fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} fn get_search(&self, _aggregate: super::SearchAggregator) {} @@ -41,6 +50,7 @@ impl Analytics for MockAnalytics { _request: &HttpRequest, ) { } + fn delete_documents(&self, _kind: DocumentDeletionKind, _request: &HttpRequest) {} fn update_documents( &self, _documents_query: &UpdateDocumentsQuery, @@ -48,4 +58,6 @@ impl Analytics for MockAnalytics { _request: &HttpRequest, ) { } + fn get_tasks(&self, _query: &TasksFilterQueryRaw, _request: &HttpRequest) {} + fn health_seen(&self, _request: &HttpRequest) {} } diff --git a/meilisearch-http/src/analytics/mod.rs b/meilisearch-http/src/analytics/mod.rs index b51f306a9..46c4b2090 100644 --- a/meilisearch-http/src/analytics/mod.rs +++ b/meilisearch-http/src/analytics/mod.rs @@ -5,15 +5,17 @@ mod segment_analytics; use std::fs; use std::path::{Path, PathBuf}; +use std::str::FromStr; use actix_web::HttpRequest; +use meilisearch_types::InstanceUid; +pub use mock_analytics::MockAnalytics; use once_cell::sync::Lazy; use platform_dirs::AppDirs; use serde_json::Value; use crate::routes::indexes::documents::UpdateDocumentsQuery; - -pub use mock_analytics::MockAnalytics; +use crate::routes::tasks::TasksFilterQueryRaw; // if we are in debug mode OR the analytics feature is disabled // the `SegmentAnalytics` point to the mock instead of the real analytics @@ -40,24 +42,29 @@ fn config_user_id_path(db_path: &Path) -> Option { db_path .canonicalize() .ok() - .map(|path| { - path.join("instance-uid") - .display() - .to_string() - .replace('/', "-") - }) + .map(|path| path.join("instance-uid").display().to_string().replace('/', "-")) .zip(MEILISEARCH_CONFIG_PATH.as_ref()) .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) } /// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid` -fn find_user_id(db_path: &Path) -> Option { +fn find_user_id(db_path: &Path) -> Option { fs::read_to_string(db_path.join("instance-uid")) .ok() .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) + .and_then(|uid| InstanceUid::from_str(&uid).ok()) +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum DocumentDeletionKind { + PerDocumentId, + ClearAll, + PerBatch, } pub trait Analytics: Sync + Send { + fn instance_uid(&self) -> Option<&InstanceUid>; + /// The method used to publish most analytics that do not need to be batched every hours fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); @@ -74,6 +81,10 @@ pub trait Analytics: Sync + Send { index_creation: bool, request: &HttpRequest, ); + + // this method should be called to aggregate a add documents request + fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest); + // this method should be called to batch a update documents request fn update_documents( &self, @@ -81,4 +92,10 @@ pub trait Analytics: Sync + Send { index_creation: bool, request: &HttpRequest, ); + + // this method should be called to aggregate the get tasks requests. + fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest); + + // this method should be called to aggregate a add documents request + fn health_seen(&self, request: &HttpRequest); } diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index f0dfd0fab..afec4c5cb 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -6,18 +6,16 @@ use std::time::{Duration, Instant}; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; +use byte_unit::Byte; use http::header::CONTENT_TYPE; +use index_scheduler::IndexScheduler; use meilisearch_auth::SearchRules; -use meilisearch_lib::index::{ - SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, -}; -use meilisearch_lib::index_controller::Stats; -use meilisearch_lib::MeiliSearch; +use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; use regex::Regex; use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; +use serde::Serialize; use serde_json::{json, Value}; use sysinfo::{DiskExt, System, SystemExt}; use time::OffsetDateTime; @@ -25,23 +23,28 @@ use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; +use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH}; use crate::analytics::Analytics; +use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, SchedulerConfig}; use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::routes::tasks::TasksFilterQueryRaw; +use crate::routes::{create_all_stats, Stats}; +use crate::search::{ + SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, +}; use crate::Opt; -use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; - const ANALYTICS_HEADER: &str = "X-Meilisearch-Client"; /// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. -fn write_user_id(db_path: &Path, user_id: &str) { +fn write_user_id(db_path: &Path, user_id: &InstanceUid) { let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes()); - if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH - .as_ref() - .zip(config_user_id_path(db_path)) + if let Some((meilisearch_config_path, user_id_path)) = + MEILISEARCH_CONFIG_PATH.as_ref().zip(config_user_id_path(db_path)) { let _ = fs::create_dir_all(&meilisearch_config_path); - let _ = fs::write(user_id_path, user_id.as_bytes()); + let _ = fs::write(user_id_path, user_id.to_string()); } } @@ -66,35 +69,35 @@ pub enum AnalyticsMsg { AggregateGetSearch(SearchAggregator), AggregatePostSearch(SearchAggregator), AggregateAddDocuments(DocumentsAggregator), + AggregateDeleteDocuments(DocumentsDeletionAggregator), AggregateUpdateDocuments(DocumentsAggregator), + AggregateTasks(TasksAggregator), + AggregateHealth(HealthAggregator), } pub struct SegmentAnalytics { + instance_uid: InstanceUid, sender: Sender, user: User, } impl SegmentAnalytics { - pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> (Arc, String) { - let user_id = super::find_user_id(&opt.db_path); - let first_time_run = user_id.is_none(); - let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); - write_user_id(&opt.db_path, &user_id); + pub async fn new(opt: &Opt, index_scheduler: Arc) -> Arc { + let instance_uid = super::find_user_id(&opt.db_path); + let first_time_run = instance_uid.is_none(); + let instance_uid = instance_uid.unwrap_or_else(|| Uuid::new_v4()); + write_user_id(&opt.db_path, &instance_uid); - let client = reqwest::Client::builder() - .connect_timeout(Duration::from_secs(10)) - .build(); + let client = reqwest::Client::builder().connect_timeout(Duration::from_secs(10)).build(); // if reqwest throws an error we won't be able to send analytics if client.is_err() { return super::MockAnalytics::new(opt); } - let client = HttpClient::new( - client.unwrap(), - "https://telemetry.meilisearch.com".to_string(), - ); - let user = User::UserId { user_id }; + let client = + HttpClient::new(client.unwrap(), "https://telemetry.meilisearch.com".to_string()); + let user = User::UserId { user_id: instance_uid.to_string() }; let mut batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string()); // If Meilisearch is Launched for the first time: @@ -103,9 +106,7 @@ impl SegmentAnalytics { if first_time_run { let _ = batcher .push(Track { - user: User::UserId { - user_id: "total_launch".to_string(), - }, + user: User::UserId { user_id: "total_launch".to_string() }, event: "Launched".to_string(), ..Default::default() }) @@ -130,20 +131,24 @@ impl SegmentAnalytics { post_search_aggregator: SearchAggregator::default(), get_search_aggregator: SearchAggregator::default(), add_documents_aggregator: DocumentsAggregator::default(), + delete_documents_aggregator: DocumentsDeletionAggregator::default(), update_documents_aggregator: DocumentsAggregator::default(), + get_tasks_aggregator: TasksAggregator::default(), + health_aggregator: HealthAggregator::default(), }); - tokio::spawn(segment.run(meilisearch.clone())); + tokio::spawn(segment.run(index_scheduler.clone())); - let this = Self { - sender, - user: user.clone(), - }; + let this = Self { instance_uid, sender, user: user.clone() }; - (Arc::new(this), user.to_string()) + Arc::new(this) } } impl super::Analytics for SegmentAnalytics { + fn instance_uid(&self) -> Option<&InstanceUid> { + Some(&self.instance_uid) + } + fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { let user_agent = request.map(|req| extract_user_agents(req)); @@ -154,21 +159,15 @@ impl super::Analytics for SegmentAnalytics { properties: send, ..Default::default() }; - let _ = self - .sender - .try_send(AnalyticsMsg::BatchMessage(event.into())); + let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event.into())); } fn get_search(&self, aggregate: SearchAggregator) { - let _ = self - .sender - .try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); } fn post_search(&self, aggregate: SearchAggregator) { - let _ = self - .sender - .try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } fn add_documents( @@ -178,9 +177,12 @@ impl super::Analytics for SegmentAnalytics { request: &HttpRequest, ) { let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self - .sender - .try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); + } + + fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest) { + let aggregate = DocumentsDeletionAggregator::from_query(kind, request); + let _ = self.sender.try_send(AnalyticsMsg::AggregateDeleteDocuments(aggregate)); } fn update_documents( @@ -190,9 +192,135 @@ impl super::Analytics for SegmentAnalytics { request: &HttpRequest, ) { let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); - let _ = self - .sender - .try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); + } + + fn get_tasks(&self, query: &TasksFilterQueryRaw, request: &HttpRequest) { + let aggregate = TasksAggregator::from_query(query, request); + let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate)); + } + + fn health_seen(&self, request: &HttpRequest) { + let aggregate = HealthAggregator::from_query(request); + let _ = self.sender.try_send(AnalyticsMsg::AggregateHealth(aggregate)); + } +} + +/// This structure represent the `infos` field we send in the analytics. +/// It's quite close to the `Opt` structure except all sensitive informations +/// have been simplified to a boolean. +/// It's send as-is in amplitude thus you should never update a name of the +/// struct without the approval of the PM. +#[derive(Debug, Clone, Serialize)] +struct Infos { + env: String, + db_path: bool, + import_dump: bool, + dumps_dir: bool, + ignore_missing_dump: bool, + ignore_dump_if_db_exists: bool, + import_snapshot: bool, + schedule_snapshot: bool, + snapshot_dir: bool, + snapshot_interval_sec: u64, + ignore_missing_snapshot: bool, + ignore_snapshot_if_db_exists: bool, + http_addr: bool, + max_index_size: Byte, + max_task_db_size: Byte, + http_payload_size_limit: Byte, + disable_auto_batching: bool, + log_level: String, + max_indexing_memory: MaxMemory, + max_indexing_threads: MaxThreads, + with_configuration_file: bool, + ssl_auth_path: bool, + ssl_cert_path: bool, + ssl_key_path: bool, + ssl_ocsp_path: bool, + ssl_require_auth: bool, + ssl_resumption: bool, + ssl_tickets: bool, +} + +impl From for Infos { + fn from(options: Opt) -> Self { + // We wants to decompose this whole struct by hand to be sure we don't forget + // to add analytics when we add a field in the Opt. + // Thus we must not insert `..` at the end. + let Opt { + db_path, + http_addr, + master_key: _, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + import_snapshot, + ignore_missing_snapshot, + ignore_snapshot_if_db_exists, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + import_dump, + ignore_missing_dump, + ignore_dump_if_db_exists, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + config_file_path, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics: _, + } = options; + + let SchedulerConfig { disable_auto_batching } = scheduler_options; + let IndexerOpts { + log_every_n: _, + max_nb_chunks: _, + max_indexing_memory, + max_indexing_threads, + } = indexer_options; + + // We're going to override every sensible information. + // We consider information sensible if it contains a path, an address, or a key. + Self { + env, + db_path: db_path != PathBuf::from("./data.ms"), + import_dump: import_dump.is_some(), + dumps_dir: dumps_dir != PathBuf::from("dumps/"), + ignore_missing_dump, + ignore_dump_if_db_exists, + import_snapshot: import_snapshot.is_some(), + schedule_snapshot, + snapshot_dir: snapshot_dir != PathBuf::from("snapshots/"), + snapshot_interval_sec, + ignore_missing_snapshot, + ignore_snapshot_if_db_exists, + http_addr: http_addr != default_http_addr(), + max_index_size, + max_task_db_size, + http_payload_size_limit, + disable_auto_batching, + log_level, + max_indexing_memory, + max_indexing_threads, + with_configuration_file: config_file_path.is_some(), + ssl_auth_path: ssl_auth_path.is_some(), + ssl_cert_path: ssl_cert_path.is_some(), + ssl_key_path: ssl_key_path.is_some(), + ssl_ocsp_path: ssl_ocsp_path.is_some(), + ssl_require_auth, + ssl_resumption, + ssl_tickets, + } } } @@ -204,7 +332,10 @@ pub struct Segment { get_search_aggregator: SearchAggregator, post_search_aggregator: SearchAggregator, add_documents_aggregator: DocumentsAggregator, + delete_documents_aggregator: DocumentsDeletionAggregator, update_documents_aggregator: DocumentsAggregator, + get_tasks_aggregator: TasksAggregator, + health_aggregator: HealthAggregator, } impl Segment { @@ -220,42 +351,14 @@ impl Segment { json!({ "distribution": sys.name(), "kernel_version": kernel_version, - "cores": sys.processors().len(), + "cores": sys.cpus().len(), "ram_size": sys.total_memory(), "disk_size": sys.disks().iter().map(|disk| disk.total_space()).max(), "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), }) }); - // The infos are all cli option except every option containing sensitive information. - // We consider an information as sensible if it contains a path, an address or a key. - let infos = { - // First we see if any sensitive fields were used. - let db_path = opt.db_path != PathBuf::from("./data.ms"); - let import_dump = opt.import_dump.is_some(); - let dumps_dir = opt.dumps_dir != PathBuf::from("dumps/"); - let import_snapshot = opt.import_snapshot.is_some(); - let snapshots_dir = opt.snapshot_dir != PathBuf::from("snapshots/"); - let http_addr = opt.http_addr != "127.0.0.1:7700"; - - let mut infos = serde_json::to_value(opt).unwrap(); - - // Then we overwrite all sensitive field with a boolean representing if - // the feature was used or not. - infos["db_path"] = json!(db_path); - infos["import_dump"] = json!(import_dump); - infos["dumps_dir"] = json!(dumps_dir); - infos["import_snapshot"] = json!(import_snapshot); - infos["snapshot_dir"] = json!(snapshots_dir); - infos["http_addr"] = json!(http_addr); - - infos - }; - - let number_of_documents = stats - .indexes - .values() - .map(|index| index.number_of_documents) - .collect::>(); + let number_of_documents = + stats.indexes.values().map(|index| index.number_of_documents).collect::>(); json!({ "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day @@ -265,11 +368,11 @@ impl Segment { "indexes_number": stats.indexes.len(), "documents_number": number_of_documents, }, - "infos": infos, + "infos": Infos::from(opt.clone()), }) } - async fn run(mut self, meilisearch: MeiliSearch) { + async fn run(mut self, index_scheduler: Arc) { const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour // The first batch must be sent after one hour. let mut interval = @@ -278,7 +381,7 @@ impl Segment { loop { select! { _ = interval.tick() => { - self.tick(meilisearch.clone()).await; + self.tick(index_scheduler.clone()).await; }, msg = self.inbox.recv() => { match msg { @@ -286,7 +389,10 @@ impl Segment { Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg), Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg), None => (), } } @@ -294,8 +400,8 @@ impl Segment { } } - async fn tick(&mut self, meilisearch: MeiliSearch) { - if let Ok(stats) = meilisearch.get_all_stats(&SearchRules::default()).await { + async fn tick(&mut self, index_scheduler: Arc) { + if let Ok(stats) = create_all_stats(index_scheduler.into(), &SearchRules::default()) { let _ = self .batcher .push(Identify { @@ -316,8 +422,14 @@ impl Segment { .into_event(&self.user, "Documents Searched POST"); let add_documents = std::mem::take(&mut self.add_documents_aggregator) .into_event(&self.user, "Documents Added"); + let delete_documents = std::mem::take(&mut self.delete_documents_aggregator) + .into_event(&self.user, "Documents Deleted"); let update_documents = std::mem::take(&mut self.update_documents_aggregator) .into_event(&self.user, "Documents Updated"); + let get_tasks = + std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen"); + let health = + std::mem::take(&mut self.health_aggregator).into_event(&self.user, "Health Seen"); if let Some(get_search) = get_search { let _ = self.batcher.push(get_search).await; @@ -328,9 +440,18 @@ impl Segment { if let Some(add_documents) = add_documents { let _ = self.batcher.push(add_documents).await; } + if let Some(delete_documents) = delete_documents { + let _ = self.batcher.push(delete_documents).await; + } if let Some(update_documents) = update_documents { let _ = self.batcher.push(update_documents).await; } + if let Some(get_tasks) = get_tasks { + let _ = self.batcher.push(get_tasks).await; + } + if let Some(health) = health { + let _ = self.batcher.push(health).await; + } let _ = self.batcher.flush().await; } } @@ -349,16 +470,16 @@ pub struct SearchAggregator { // sort sort_with_geo_point: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains sort_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, // filter filter_with_geo_radius: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains filter_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one filter_total_number_of_criteria: usize, used_syntax: HashMap, @@ -366,19 +487,27 @@ pub struct SearchAggregator { // The maximum number of terms in a q request max_terms_number: usize, - // everytime a search is done, we increment the counter linked to the used settings + // every time a search is done, we increment the counter linked to the used settings matching_strategy: HashMap, // pagination max_limit: usize, max_offset: usize, + finite_pagination: usize, // formatting + max_attributes_to_retrieve: usize, + max_attributes_to_highlight: usize, highlight_pre_tag: bool, highlight_post_tag: bool, + max_attributes_to_crop: usize, crop_marker: bool, show_matches_position: bool, crop_length: bool, + + // facets + facets_sum_of_terms: usize, + facets_total_number_of_facets: usize, } impl SearchAggregator { @@ -402,11 +531,7 @@ impl SearchAggregator { let syntax = match filter { Value::String(_) => "string".to_string(), Value::Array(values) => { - if values - .iter() - .map(|v| v.to_string()) - .any(|s| RE.is_match(&s)) - { + if values.iter().map(|v| v.to_string()).any(|s| RE.is_match(&s)) { "mixed".to_string() } else { "array".to_string() @@ -426,11 +551,18 @@ impl SearchAggregator { ret.max_terms_number = q.split_whitespace().count(); } - ret.matching_strategy - .insert(format!("{:?}", query.matching_strategy), 1); + if query.is_finite_pagination() { + let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + ret.max_limit = limit; + ret.max_offset = query.page.unwrap_or(1).saturating_sub(1) * limit; + ret.finite_pagination = 1; + } else { + ret.max_limit = query.limit; + ret.max_offset = query.offset; + ret.finite_pagination = 0; + } - ret.max_limit = query.limit; - ret.max_offset = query.offset.unwrap_or_default(); + ret.matching_strategy.insert(format!("{:?}", query.matching_strategy), 1); ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); @@ -456,23 +588,23 @@ impl SearchAggregator { for user_agent in other.user_agents.into_iter() { self.user_agents.insert(user_agent); } + // request self.total_received = self.total_received.saturating_add(other.total_received); self.total_succeeded = self.total_succeeded.saturating_add(other.total_succeeded); self.time_spent.append(&mut other.time_spent); + // sort self.sort_with_geo_point |= other.sort_with_geo_point; - self.sort_sum_of_criteria_terms = self - .sort_sum_of_criteria_terms - .saturating_add(other.sort_sum_of_criteria_terms); - self.sort_total_number_of_criteria = self - .sort_total_number_of_criteria - .saturating_add(other.sort_total_number_of_criteria); + self.sort_sum_of_criteria_terms = + self.sort_sum_of_criteria_terms.saturating_add(other.sort_sum_of_criteria_terms); + self.sort_total_number_of_criteria = + self.sort_total_number_of_criteria.saturating_add(other.sort_total_number_of_criteria); + // filter self.filter_with_geo_radius |= other.filter_with_geo_radius; - self.filter_sum_of_criteria_terms = self - .filter_sum_of_criteria_terms - .saturating_add(other.filter_sum_of_criteria_terms); + self.filter_sum_of_criteria_terms = + self.filter_sum_of_criteria_terms.saturating_add(other.filter_sum_of_criteria_terms); self.filter_total_number_of_criteria = self .filter_total_number_of_criteria .saturating_add(other.filter_total_number_of_criteria); @@ -483,19 +615,34 @@ impl SearchAggregator { // q self.max_terms_number = self.max_terms_number.max(other.max_terms_number); + // pagination + self.max_limit = self.max_limit.max(other.max_limit); + self.max_offset = self.max_offset.max(other.max_offset); + self.finite_pagination += other.finite_pagination; + + // formatting + self.max_attributes_to_retrieve = + self.max_attributes_to_retrieve.max(other.max_attributes_to_retrieve); + self.max_attributes_to_highlight = + self.max_attributes_to_highlight.max(other.max_attributes_to_highlight); + self.highlight_pre_tag |= other.highlight_pre_tag; + self.highlight_post_tag |= other.highlight_post_tag; + self.max_attributes_to_crop = self.max_attributes_to_crop.max(other.max_attributes_to_crop); + self.crop_marker |= other.crop_marker; + self.show_matches_position |= other.show_matches_position; + self.crop_length |= other.crop_length; + + // facets + self.facets_sum_of_terms = + self.facets_sum_of_terms.saturating_add(other.facets_sum_of_terms); + self.facets_total_number_of_facets = + self.facets_total_number_of_facets.saturating_add(other.facets_total_number_of_facets); + + // matching strategy for (key, value) in other.matching_strategy.into_iter() { let matching_strategy = self.matching_strategy.entry(key).or_insert(0); *matching_strategy = matching_strategy.saturating_add(value); } - // pagination - self.max_limit = self.max_limit.max(other.max_limit); - self.max_offset = self.max_offset.max(other.max_offset); - - self.highlight_pre_tag |= other.highlight_pre_tag; - self.highlight_post_tag |= other.highlight_post_tag; - self.crop_marker |= other.crop_marker; - self.show_matches_position |= other.show_matches_position; - self.crop_length |= other.crop_length; } pub fn into_event(self, user: &User, event_name: &str) -> Option { @@ -528,19 +675,28 @@ impl SearchAggregator { }, "q": { "max_terms_number": self.max_terms_number, - "most_used_matching_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), }, "pagination": { "max_limit": self.max_limit, "max_offset": self.max_offset, + "most_used_navigation": if self.finite_pagination > (self.total_received / 2) { "exhaustive" } else { "estimated" }, }, "formatting": { + "max_attributes_to_retrieve": self.max_attributes_to_retrieve, + "max_attributes_to_highlight": self.max_attributes_to_highlight, "highlight_pre_tag": self.highlight_pre_tag, "highlight_post_tag": self.highlight_post_tag, + "max_attributes_to_crop": self.max_attributes_to_crop, "crop_marker": self.crop_marker, "show_matches_position": self.show_matches_position, "crop_length": self.crop_length, }, + "facets": { + "avg_facets_number": format!("{:.2}", self.facets_sum_of_terms as f64 / self.facets_total_number_of_facets as f64), + }, + "matching_strategy": { + "most_used_strategy": self.matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + } }); Some(Track { @@ -636,3 +792,200 @@ impl DocumentsAggregator { } } } + +#[derive(Default, Serialize)] +pub struct DocumentsDeletionAggregator { + #[serde(skip)] + timestamp: Option, + + // context + #[serde(rename = "user-agent")] + user_agents: HashSet, + + total_received: usize, + per_document_id: bool, + clear_all: bool, + per_batch: bool, +} + +impl DocumentsDeletionAggregator { + pub fn from_query(kind: DocumentDeletionKind, request: &HttpRequest) -> Self { + let mut ret = Self::default(); + ret.timestamp = Some(OffsetDateTime::now_utc()); + + ret.user_agents = extract_user_agents(request).into_iter().collect(); + ret.total_received = 1; + match kind { + DocumentDeletionKind::PerDocumentId => ret.per_document_id = true, + DocumentDeletionKind::ClearAll => ret.clear_all = true, + DocumentDeletionKind::PerBatch => ret.per_batch = true, + } + + ret + } + + /// Aggregate one [DocumentsAggregator] into another. + pub fn aggregate(&mut self, other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + + // we can't create a union because there is no `into_union` method + for user_agent in other.user_agents { + self.user_agents.insert(user_agent); + } + self.total_received = self.total_received.saturating_add(other.total_received); + self.per_document_id |= other.per_document_id; + self.clear_all |= other.clear_all; + self.per_batch |= other.per_batch; + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + // if we had no timestamp it means we never encountered any events and + // thus we don't need to send this event. + let timestamp = self.timestamp?; + + Some(Track { + timestamp: Some(timestamp), + user: user.clone(), + event: event_name.to_string(), + properties: serde_json::to_value(self).ok()?, + ..Default::default() + }) + } +} + +#[derive(Default, Serialize)] +pub struct TasksAggregator { + #[serde(skip)] + timestamp: Option, + + // context + #[serde(rename = "user-agent")] + user_agents: HashSet, + + filtered_by_uid: bool, + filtered_by_index_uid: bool, + filtered_by_type: bool, + filtered_by_status: bool, + filtered_by_canceled_by: bool, + filtered_by_before_enqueued_at: bool, + filtered_by_after_enqueued_at: bool, + filtered_by_before_started_at: bool, + filtered_by_after_started_at: bool, + filtered_by_before_finished_at: bool, + filtered_by_after_finished_at: bool, + total_received: usize, +} + +impl TasksAggregator { + pub fn from_query(query: &TasksFilterQueryRaw, request: &HttpRequest) -> Self { + Self { + timestamp: Some(OffsetDateTime::now_utc()), + user_agents: extract_user_agents(request).into_iter().collect(), + filtered_by_uid: query.common.uids.is_some(), + filtered_by_index_uid: query.common.index_uids.is_some(), + filtered_by_type: query.common.types.is_some(), + filtered_by_status: query.common.statuses.is_some(), + filtered_by_canceled_by: query.common.canceled_by.is_some(), + filtered_by_before_enqueued_at: query.dates.before_enqueued_at.is_some(), + filtered_by_after_enqueued_at: query.dates.after_enqueued_at.is_some(), + filtered_by_before_started_at: query.dates.before_started_at.is_some(), + filtered_by_after_started_at: query.dates.after_started_at.is_some(), + filtered_by_before_finished_at: query.dates.before_finished_at.is_some(), + filtered_by_after_finished_at: query.dates.after_finished_at.is_some(), + total_received: 1, + } + } + + /// Aggregate one [DocumentsAggregator] into another. + pub fn aggregate(&mut self, other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + + // we can't create a union because there is no `into_union` method + for user_agent in other.user_agents { + self.user_agents.insert(user_agent); + } + + self.filtered_by_uid |= other.filtered_by_uid; + self.filtered_by_index_uid |= other.filtered_by_index_uid; + self.filtered_by_type |= other.filtered_by_type; + self.filtered_by_status |= other.filtered_by_status; + self.filtered_by_canceled_by |= other.filtered_by_canceled_by; + self.filtered_by_before_enqueued_at |= other.filtered_by_before_enqueued_at; + self.filtered_by_after_enqueued_at |= other.filtered_by_after_enqueued_at; + self.filtered_by_before_started_at |= other.filtered_by_before_started_at; + self.filtered_by_after_started_at |= other.filtered_by_after_started_at; + self.filtered_by_before_finished_at |= other.filtered_by_before_finished_at; + self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at; + self.filtered_by_after_finished_at |= other.filtered_by_after_finished_at; + + self.total_received = self.total_received.saturating_add(other.total_received); + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + // if we had no timestamp it means we never encountered any events and + // thus we don't need to send this event. + let timestamp = self.timestamp?; + + Some(Track { + timestamp: Some(timestamp), + user: user.clone(), + event: event_name.to_string(), + properties: serde_json::to_value(self).ok()?, + ..Default::default() + }) + } +} + +#[derive(Default, Serialize)] +pub struct HealthAggregator { + #[serde(skip)] + timestamp: Option, + + // context + #[serde(rename = "user-agent")] + user_agents: HashSet, + + total_received: usize, +} + +impl HealthAggregator { + pub fn from_query(request: &HttpRequest) -> Self { + let mut ret = Self::default(); + ret.timestamp = Some(OffsetDateTime::now_utc()); + + ret.user_agents = extract_user_agents(request).into_iter().collect(); + ret.total_received = 1; + ret + } + + /// Aggregate one [DocumentsAggregator] into another. + pub fn aggregate(&mut self, other: Self) { + if self.timestamp.is_none() { + self.timestamp = other.timestamp; + } + + // we can't create a union because there is no `into_union` method + for user_agent in other.user_agents { + self.user_agents.insert(user_agent); + } + self.total_received = self.total_received.saturating_add(other.total_received); + } + + pub fn into_event(self, user: &User, event_name: &str) -> Option { + // if we had no timestamp it means we never encountered any events and + // thus we don't need to send this event. + let timestamp = self.timestamp?; + + Some(Track { + timestamp: Some(timestamp), + user: user.clone(), + event: event_name.to_string(), + properties: serde_json::to_value(self).ok()?, + ..Default::default() + }) + } +} diff --git a/meilisearch-http/src/error.rs b/meilisearch-http/src/error.rs index 86b7c1964..e3cc396d7 100644 --- a/meilisearch-http/src/error.rs +++ b/meilisearch-http/src/error.rs @@ -1,6 +1,10 @@ use actix_web as aweb; use aweb::error::{JsonPayloadError, QueryPayloadError}; +use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; +use meilisearch_types::index_uid::IndexUidFormatError; +use serde_json::Value; +use tokio::task::JoinError; #[derive(Debug, thiserror::Error)] pub enum MeilisearchHttpError { @@ -12,13 +16,57 @@ pub enum MeilisearchHttpError { .1.iter().map(|s| format!("`{}`", s)).collect::>().join(", ") )] InvalidContentType(String, Vec), + #[error("Document `{0}` not found.")] + DocumentNotFound(String), + #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] + InvalidExpression(&'static [&'static str], Value), + #[error("A {0} payload is missing.")] + MissingPayload(PayloadType), + #[error("The provided payload reached the size limit.")] + PayloadTooLarge, + #[error("Two indexes must be given for each swap. The list `{:?}` contains {} indexes.", + .0, .0.len() + )] + SwapIndexPayloadWrongLength(Vec), + #[error(transparent)] + IndexUid(#[from] IndexUidFormatError), + #[error(transparent)] + SerdeJson(#[from] serde_json::Error), + #[error(transparent)] + HeedError(#[from] meilisearch_types::heed::Error), + #[error(transparent)] + IndexScheduler(#[from] index_scheduler::Error), + #[error(transparent)] + Milli(#[from] meilisearch_types::milli::Error), + #[error(transparent)] + Payload(#[from] PayloadError), + #[error(transparent)] + FileStore(#[from] file_store::Error), + #[error(transparent)] + DocumentFormat(#[from] DocumentFormatError), + #[error(transparent)] + Join(#[from] JoinError), } impl ErrorCode for MeilisearchHttpError { fn error_code(&self) -> Code { match self { MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType, + MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload, MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType, + MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound, + MeilisearchHttpError::InvalidExpression(_, _) => Code::Filter, + MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge, + MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::BadRequest, + MeilisearchHttpError::IndexUid(e) => e.error_code(), + MeilisearchHttpError::SerdeJson(_) => Code::Internal, + MeilisearchHttpError::HeedError(_) => Code::Internal, + MeilisearchHttpError::IndexScheduler(e) => e.error_code(), + MeilisearchHttpError::Milli(e) => e.error_code(), + MeilisearchHttpError::Payload(e) => e.error_code(), + MeilisearchHttpError::FileStore(_) => Code::Internal, + MeilisearchHttpError::DocumentFormat(e) => e.error_code(), + MeilisearchHttpError::Join(_) => Code::Internal, } } } @@ -29,11 +77,19 @@ impl From for aweb::Error { } } +impl From for MeilisearchHttpError { + fn from(error: aweb::error::PayloadError) -> Self { + MeilisearchHttpError::Payload(PayloadError::Payload(error)) + } +} + #[derive(Debug, thiserror::Error)] pub enum PayloadError { - #[error("{0}")] + #[error(transparent)] + Payload(aweb::error::PayloadError), + #[error(transparent)] Json(JsonPayloadError), - #[error("{0}")] + #[error(transparent)] Query(QueryPayloadError), #[error("The json payload provided is malformed. `{0}`.")] MalformedPayload(serde_json::error::Error), @@ -44,6 +100,15 @@ pub enum PayloadError { impl ErrorCode for PayloadError { fn error_code(&self) -> Code { match self { + PayloadError::Payload(e) => match e { + aweb::error::PayloadError::Incomplete(_) => Code::Internal, + aweb::error::PayloadError::EncodingCorrupted => Code::Internal, + aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, + aweb::error::PayloadError::UnknownLength => Code::Internal, + aweb::error::PayloadError::Http2Payload(_) => Code::Internal, + aweb::error::PayloadError::Io(_) => Code::Internal, + _ => todo!(), + }, PayloadError::Json(err) => match err { JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge, JsonPayloadError::ContentType => Code::UnsupportedMediaType, diff --git a/meilisearch-http/src/extractors/authentication/error.rs b/meilisearch-http/src/extractors/authentication/error.rs index bb78c53d0..7fa0319b8 100644 --- a/meilisearch-http/src/extractors/authentication/error.rs +++ b/meilisearch-http/src/extractors/authentication/error.rs @@ -9,6 +9,8 @@ pub enum AuthenticationError { // Triggered on configuration error. #[error("An internal error has occurred. `Irretrievable state`.")] IrretrievableState, + #[error("Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.")] + MissingMasterKey, } impl ErrorCode for AuthenticationError { @@ -17,6 +19,7 @@ impl ErrorCode for AuthenticationError { AuthenticationError::MissingAuthorizationHeader => Code::MissingAuthorizationHeader, AuthenticationError::InvalidToken => Code::InvalidToken, AuthenticationError::IrretrievableState => Code::Internal, + AuthenticationError::MissingMasterKey => Code::MissingMasterKey, } } } diff --git a/meilisearch-http/src/extractors/authentication/mod.rs b/meilisearch-http/src/extractors/authentication/mod.rs index f6feabbbd..8944b60d3 100644 --- a/meilisearch-http/src/extractors/authentication/mod.rs +++ b/meilisearch-http/src/extractors/authentication/mod.rs @@ -31,15 +31,14 @@ impl GuardedData { where P: Policy + 'static, { + let missing_master_key = auth.get_master_key().is_none(); + match Self::authenticate(auth, token, index).await? { Some(filters) => match data { - Some(data) => Ok(Self { - data, - filters, - _marker: PhantomData, - }), + Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, + None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), None => Err(AuthenticationError::InvalidToken.into()), } } @@ -48,15 +47,14 @@ impl GuardedData { where P: Policy + 'static, { + let missing_master_key = auth.get_master_key().is_none(); + match Self::authenticate(auth, String::new(), None).await? { Some(filters) => match data { - Some(data) => Ok(Self { - data, - filters, - _marker: PhantomData, - }), + Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, + None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), None => Err(AuthenticationError::MissingAuthorizationHeader.into()), } } @@ -129,14 +127,14 @@ pub trait Policy { pub mod policies { use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation}; + use meilisearch_auth::{AuthController, AuthFilter, SearchRules}; + // reexport actions in policies in order to be used in routes configuration. + pub use meilisearch_types::keys::{actions, Action}; use serde::{Deserialize, Serialize}; use time::OffsetDateTime; use uuid::Uuid; use crate::extractors::authentication::Policy; - use meilisearch_auth::{Action, AuthController, AuthFilter, SearchRules}; - // reexport actions in policies in order to be used in routes configuration. - pub use meilisearch_auth::actions; fn tenant_token_validation() -> Validation { let mut validation = Validation::default(); @@ -174,10 +172,7 @@ pub mod policies { // authenticate if token is the master key. // master key can only have access to keys routes. // if master key is None only keys routes are inaccessible. - if auth - .get_master_key() - .map_or_else(|| !is_keys_action(A), |mk| mk == token) - { + if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) { return Some(AuthFilter::default()); } @@ -235,9 +230,7 @@ pub mod policies { } } - return auth - .get_key_filters(uid, Some(data.claims.search_rules)) - .ok(); + return auth.get_key_filters(uid, Some(data.claims.search_rules)).ok(); } None diff --git a/meilisearch-http/src/extractors/payload.rs b/meilisearch-http/src/extractors/payload.rs index 6cd8df190..0ccebe8f9 100644 --- a/meilisearch-http/src/extractors/payload.rs +++ b/meilisearch-http/src/extractors/payload.rs @@ -1,13 +1,15 @@ use std::pin::Pin; use std::task::{Context, Poll}; -use actix_web::error::PayloadError; +use actix_http::encoding::Decoder as Decompress; use actix_web::{dev, web, FromRequest, HttpRequest}; use futures::future::{ready, Ready}; use futures::Stream; +use crate::error::MeilisearchHttpError; + pub struct Payload { - payload: dev::Payload, + payload: Decompress, limit: usize, } @@ -28,7 +30,7 @@ impl Default for PayloadConfig { } impl FromRequest for Payload { - type Error = PayloadError; + type Error = MeilisearchHttpError; type Future = Ready>; @@ -39,14 +41,14 @@ impl FromRequest for Payload { .map(|c| c.limit) .unwrap_or(PayloadConfig::default().limit); ready(Ok(Payload { - payload: payload.take(), + payload: Decompress::from_headers(payload.take(), req.headers()), limit, })) } } impl Stream for Payload { - type Item = Result; + type Item = Result; #[inline] fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { @@ -57,11 +59,11 @@ impl Stream for Payload { self.limit = new_limit; Poll::Ready(Some(Ok(bytes))) } - None => Poll::Ready(Some(Err(PayloadError::Overflow))), + None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))), }, - x => Poll::Ready(Some(x)), + x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))), }, - otherwise => otherwise, + otherwise => otherwise.map(|o| o.map(|o| o.map_err(MeilisearchHttpError::from))), } } } diff --git a/meilisearch-http/src/extractors/sequential_extractor.rs b/meilisearch-http/src/extractors/sequential_extractor.rs index d6cee6083..c04210616 100644 --- a/meilisearch-http/src/extractors/sequential_extractor.rs +++ b/meilisearch-http/src/extractors/sequential_extractor.rs @@ -1,7 +1,10 @@ #![allow(non_snake_case)] -use std::{future::Future, pin::Pin, task::Poll}; +use std::future::Future; +use std::pin::Pin; +use std::task::Poll; -use actix_web::{dev::Payload, FromRequest, Handler, HttpRequest}; +use actix_web::dev::Payload; +use actix_web::{FromRequest, Handler, HttpRequest}; use pin_project_lite::pin_project; /// `SeqHandler` is an actix `Handler` that enforces that extractors errors are returned in the diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 1711fe7ba..6fa6b77d8 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -2,86 +2,350 @@ #[macro_use] pub mod error; pub mod analytics; -pub mod task; #[macro_use] pub mod extractors; pub mod option; pub mod routes; +pub mod search; #[cfg(feature = "metrics")] pub mod metrics; #[cfg(feature = "metrics")] pub mod route_metrics; -use std::sync::{atomic::AtomicBool, Arc}; +use std::fs::File; +use std::io::{BufReader, BufWriter}; +use std::path::Path; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; +use std::thread; use std::time::Duration; -use crate::error::MeilisearchHttpError; +use actix_cors::Cors; +use actix_http::body::MessageBody; +use actix_web::dev::{ServiceFactory, ServiceResponse}; use actix_web::error::JsonPayloadError; +use actix_web::web::Data; +use actix_web::{middleware, web, HttpRequest}; use analytics::Analytics; +use anyhow::bail; use error::PayloadError; +use extractors::payload::PayloadConfig; use http::header::CONTENT_TYPE; +use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; +use log::error; +use meilisearch_auth::AuthController; +use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; +use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; +use meilisearch_types::settings::apply_settings_to_builder; +use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::versioning::{check_version_file, create_version_file}; +use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; -use actix_web::{web, HttpRequest}; - -use extractors::payload::PayloadConfig; -use meilisearch_auth::AuthController; -use meilisearch_lib::MeiliSearch; +use crate::error::MeilisearchHttpError; pub static AUTOBATCHING_ENABLED: AtomicBool = AtomicBool::new(false); -pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result { - let mut meilisearch = MeiliSearch::builder(); +/// Check if a db is empty. It does not provide any information on the +/// validity of the data in it. +/// We consider a database as non empty when it's a non empty directory. +fn is_empty_db(db_path: impl AsRef) -> bool { + let db_path = db_path.as_ref(); - // disable autobatching? - AUTOBATCHING_ENABLED.store( - !opt.scheduler_options.disable_auto_batching, - std::sync::atomic::Ordering::Relaxed, - ); - - meilisearch - .set_max_index_size(opt.max_index_size.get_bytes() as usize) - .set_max_task_store_size(opt.max_task_db_size.get_bytes() as usize) - // snapshot - .set_ignore_missing_snapshot(opt.ignore_missing_snapshot) - .set_ignore_snapshot_if_db_exists(opt.ignore_snapshot_if_db_exists) - .set_snapshot_interval(Duration::from_secs(opt.snapshot_interval_sec)) - .set_snapshot_dir(opt.snapshot_dir.clone()) - // dump - .set_ignore_missing_dump(opt.ignore_missing_dump) - .set_ignore_dump_if_db_exists(opt.ignore_dump_if_db_exists) - .set_dump_dst(opt.dumps_dir.clone()); - - if let Some(ref path) = opt.import_snapshot { - meilisearch.set_import_snapshot(path.clone()); + if !db_path.exists() { + true + // if we encounter an error or if the db is a file we consider the db non empty + } else if let Ok(dir) = db_path.read_dir() { + dir.count() == 0 + } else { + true } +} - if let Some(ref path) = opt.import_dump { - meilisearch.set_dump_src(path.clone()); - } +pub fn create_app( + index_scheduler: Data, + auth_controller: AuthController, + opt: Opt, + analytics: Arc, + enable_dashboard: bool, +) -> actix_web::App< + impl ServiceFactory< + actix_web::dev::ServiceRequest, + Config = (), + Response = ServiceResponse, + Error = actix_web::Error, + InitError = (), + >, +> { + let app = actix_web::App::new() + .configure(|s| { + configure_data( + s, + index_scheduler.clone(), + auth_controller.clone(), + &opt, + analytics.clone(), + ) + }) + .configure(routes::configure) + .configure(|s| dashboard(s, enable_dashboard)); + #[cfg(feature = "metrics")] + let app = app.configure(|s| configure_metrics_route(s, opt.enable_metrics_route)); - if opt.schedule_snapshot { - meilisearch.set_schedule_snapshot(); - } - - meilisearch.build( - opt.db_path.clone(), - opt.indexer_options.clone(), - opt.scheduler_options.clone(), + #[cfg(feature = "metrics")] + let app = app.wrap(Condition::new(opt.enable_metrics_route, route_metrics::RouteMetrics)); + app.wrap( + Cors::default() + .send_wildcard() + .allow_any_header() + .allow_any_origin() + .allow_any_method() + .max_age(86_400), // 24h ) + .wrap(middleware::Logger::default()) + .wrap(middleware::Compress::default()) + .wrap(middleware::NormalizePath::new(middleware::TrailingSlash::Trim)) +} + +// TODO: TAMO: Finish setting up things +pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc, AuthController)> { + // we don't want to create anything in the data.ms yet, thus we + // wrap our two builders in a closure that'll be executed later. + let auth_controller_builder = || AuthController::new(&opt.db_path, &opt.master_key); + let index_scheduler_builder = || { + IndexScheduler::new(IndexSchedulerOptions { + version_file_path: opt.db_path.join(VERSION_FILE_NAME), + auth_path: opt.db_path.join("auth"), + tasks_path: opt.db_path.join("tasks"), + update_file_path: opt.db_path.join("update_files"), + indexes_path: opt.db_path.join("indexes"), + snapshots_path: opt.snapshot_dir.clone(), + dumps_path: opt.dumps_dir.clone(), + task_db_size: opt.max_task_db_size.get_bytes() as usize, + index_size: opt.max_index_size.get_bytes() as usize, + indexer_config: (&opt.indexer_options).try_into()?, + autobatching_enabled: !opt.scheduler_options.disable_auto_batching, + }) + }; + + enum OnFailure { + RemoveDb, + KeepDb, + } + + let meilisearch_builder = |on_failure: OnFailure| -> anyhow::Result<_> { + // if anything wrong happens we delete the `data.ms` entirely. + match ( + index_scheduler_builder().map_err(anyhow::Error::from), + auth_controller_builder().map_err(anyhow::Error::from), + create_version_file(&opt.db_path).map_err(anyhow::Error::from), + ) { + (Ok(i), Ok(a), Ok(())) => Ok((i, a)), + (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { + if matches!(on_failure, OnFailure::RemoveDb) { + std::fs::remove_dir_all(&opt.db_path)?; + } + Err(e) + } + } + }; + + let empty_db = is_empty_db(&opt.db_path); + let (index_scheduler, auth_controller) = if let Some(ref snapshot_path) = opt.import_snapshot { + let snapshot_path_exists = snapshot_path.exists(); + if empty_db && snapshot_path_exists { + match compression::from_tar_gz(snapshot_path, &opt.db_path) { + Ok(()) => meilisearch_builder(OnFailure::RemoveDb)?, + Err(e) => { + std::fs::remove_dir_all(&opt.db_path)?; + return Err(e); + } + } + } else if !empty_db && !opt.ignore_snapshot_if_db_exists { + bail!( + "database already exists at {:?}, try to delete it or rename it", + opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) + ) + } else if !snapshot_path_exists && !opt.ignore_missing_snapshot { + bail!("snapshot doesn't exist at {}", snapshot_path.display()) + } else { + meilisearch_builder(OnFailure::RemoveDb)? + } + } else if let Some(ref path) = opt.import_dump { + let src_path_exists = path.exists(); + if empty_db && src_path_exists { + let (mut index_scheduler, mut auth_controller) = + meilisearch_builder(OnFailure::RemoveDb)?; + match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { + Ok(()) => (index_scheduler, auth_controller), + Err(e) => { + std::fs::remove_dir_all(&opt.db_path)?; + return Err(e); + } + } + } else if !empty_db && !opt.ignore_dump_if_db_exists { + bail!( + "database already exists at {:?}, try to delete it or rename it", + opt.db_path.canonicalize().unwrap_or_else(|_| opt.db_path.to_owned()) + ) + } else if !src_path_exists && !opt.ignore_missing_dump { + bail!("dump doesn't exist at {:?}", path) + } else { + let (mut index_scheduler, mut auth_controller) = + meilisearch_builder(OnFailure::RemoveDb)?; + match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) { + Ok(()) => (index_scheduler, auth_controller), + Err(e) => { + std::fs::remove_dir_all(&opt.db_path)?; + return Err(e); + } + } + } + } else { + if !empty_db { + check_version_file(&opt.db_path)?; + } + meilisearch_builder(OnFailure::KeepDb)? + }; + + // We create a loop in a thread that registers snapshotCreation tasks + let index_scheduler = Arc::new(index_scheduler); + if opt.schedule_snapshot { + let snapshot_delay = Duration::from_secs(opt.snapshot_interval_sec); + let index_scheduler = index_scheduler.clone(); + thread::Builder::new() + .name(String::from("register-snapshot-tasks")) + .spawn(move || loop { + thread::sleep(snapshot_delay); + if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) { + error!("Error while registering snapshot: {}", e); + } + }) + .unwrap(); + } + + Ok((index_scheduler, auth_controller)) +} + +fn import_dump( + db_path: &Path, + dump_path: &Path, + index_scheduler: &mut IndexScheduler, + auth: &mut AuthController, +) -> Result<(), anyhow::Error> { + let reader = File::open(dump_path)?; + let mut dump_reader = dump::DumpReader::open(reader)?; + + if let Some(date) = dump_reader.date() { + log::info!( + "Importing a dump of meilisearch `{:?}` from the {}", + dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + date + ); + } else { + log::info!( + "Importing a dump of meilisearch `{:?}`", + dump_reader.version(), // TODO: get the meilisearch version instead of the dump version + ); + } + + let instance_uid = dump_reader.instance_uid()?; + + // 1. Import the instance-uid. + if let Some(ref instance_uid) = instance_uid { + // we don't want to panic if there is an error with the instance-uid. + let _ = std::fs::write(db_path.join("instance-uid"), instance_uid.to_string().as_bytes()); + }; + + // 2. Import the `Key`s. + let mut keys = Vec::new(); + auth.raw_delete_all_keys()?; + for key in dump_reader.keys()? { + let key = key?; + auth.raw_insert_key(key.clone())?; + keys.push(key); + } + + let indexer_config = index_scheduler.indexer_config(); + + // /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might + // try to process tasks while we're trying to import the indexes. + + // 3. Import the indexes. + for index_reader in dump_reader.indexes()? { + let mut index_reader = index_reader?; + let metadata = index_reader.metadata(); + log::info!("Importing index `{}`.", metadata.uid); + let index = index_scheduler.create_raw_index(&metadata.uid)?; + + let mut wtxn = index.write_txn()?; + + let mut builder = milli::update::Settings::new(&mut wtxn, &index, indexer_config); + // 3.1 Import the primary key if there is one. + if let Some(ref primary_key) = metadata.primary_key { + builder.set_primary_key(primary_key.to_string()); + } + + // 3.2 Import the settings. + log::info!("Importing the settings."); + let settings = index_reader.settings()?; + apply_settings_to_builder(&settings, &mut builder); + builder.execute(|indexing_step| log::debug!("update: {:?}", indexing_step), || false)?; + + // 3.3 Import the documents. + // 3.3.1 We need to recreate the grenad+obkv format accepted by the index. + log::info!("Importing the documents."); + let file = tempfile::tempfile()?; + let mut builder = DocumentsBatchBuilder::new(BufWriter::new(file)); + for document in index_reader.documents()? { + builder.append_json_object(&document?)?; + } + + // This flush the content of the batch builder. + let file = builder.into_inner()?.into_inner()?; + + // 3.3.2 We feed it to the milli index. + let reader = BufReader::new(file); + let reader = DocumentsBatchReader::from_reader(reader)?; + + let builder = milli::update::IndexDocuments::new( + &mut wtxn, + &index, + indexer_config, + IndexDocumentsConfig { + update_method: IndexDocumentsMethod::ReplaceDocuments, + ..Default::default() + }, + |indexing_step| log::debug!("update: {:?}", indexing_step), + || false, + )?; + + let (builder, user_result) = builder.add_documents(reader)?; + log::info!("{} documents found.", user_result?); + builder.execute()?; + wtxn.commit()?; + log::info!("All documents successfully imported."); + } + + // 4. Import the tasks. + for ret in dump_reader.tasks()? { + let (task, file) = ret?; + index_scheduler.register_dumped_task(task, file)?; + } + Ok(()) } pub fn configure_data( config: &mut web::ServiceConfig, - data: MeiliSearch, + index_scheduler: Data, auth: AuthController, opt: &Opt, analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config - .app_data(data) + .app_data(index_scheduler) .app_data(auth) .app_data(web::Data::from(analytics)) .app_data( @@ -121,9 +385,7 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) { let generated = generated::generate(); // Generate routes for mini-dashboard assets for (path, resource) in generated.into_iter() { - let Resource { - mime_type, data, .. - } = resource; + let Resource { mime_type, data, .. } = resource; // Redirect index.html to / if path == "index.html" { config.service(web::resource("/").route(web::get().to(move || async move { @@ -153,52 +415,3 @@ pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_r ); } } - -#[macro_export] -macro_rules! create_app { - ($data:expr, $auth:expr, $enable_frontend:expr, $opt:expr, $analytics:expr) => {{ - use actix_cors::Cors; - use actix_web::dev::Service; - use actix_web::middleware::Condition; - use actix_web::middleware::TrailingSlash; - use actix_web::App; - use actix_web::{middleware, web}; - use meilisearch_http::error::MeilisearchHttpError; - use meilisearch_http::routes; - use meilisearch_http::{configure_data, dashboard}; - #[cfg(feature = "metrics")] - use meilisearch_http::{configure_metrics_route, metrics, route_metrics}; - use meilisearch_types::error::ResponseError; - - let app = App::new() - .configure(|s| configure_data(s, $data.clone(), $auth.clone(), &$opt, $analytics)) - .configure(routes::configure) - .configure(|s| dashboard(s, $enable_frontend)); - - #[cfg(feature = "metrics")] - let app = app.configure(|s| configure_metrics_route(s, $opt.enable_metrics_route)); - - let app = app - .wrap( - Cors::default() - .send_wildcard() - .allow_any_header() - .allow_any_origin() - .allow_any_method() - .max_age(86_400), // 24h - ) - .wrap(middleware::Logger::default()) - .wrap(middleware::Compress::default()) - .wrap(middleware::NormalizePath::new( - middleware::TrailingSlash::Trim, - )); - - #[cfg(feature = "metrics")] - let app = app.wrap(Condition::new( - $opt.enable_metrics_route, - route_metrics::RouteMetrics, - )); - - app - }}; -} diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 9627aeef8..087b65247 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,14 +1,14 @@ use std::env; +use std::path::PathBuf; use std::sync::Arc; use actix_web::http::KeepAlive; +use actix_web::web::Data; use actix_web::HttpServer; -use clap::Parser; +use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_http::analytics; use meilisearch_http::analytics::Analytics; -use meilisearch_http::{create_app, setup_meilisearch, Opt}; -use meilisearch_lib::MeiliSearch; +use meilisearch_http::{analytics, create_app, setup_meilisearch, Opt}; #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -29,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[actix_web::main] async fn main() -> anyhow::Result<()> { - let opt = Opt::parse(); + let (opt, config_read_from) = Opt::try_build()?; setup(&opt)?; @@ -45,66 +45,64 @@ async fn main() -> anyhow::Result<()> { _ => unreachable!(), } - let meilisearch = setup_meilisearch(&opt)?; - - let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?; + let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] - let (analytics, user) = if !opt.no_analytics { - analytics::SegmentAnalytics::new(&opt, &meilisearch).await + let analytics = if !opt.no_analytics { + analytics::SegmentAnalytics::new(&opt, index_scheduler.clone()).await } else { analytics::MockAnalytics::new(&opt) }; #[cfg(any(debug_assertions, not(feature = "analytics")))] - let (analytics, user) = analytics::MockAnalytics::new(&opt); + let analytics = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user); + print_launch_resume(&opt, analytics.clone(), config_read_from); - run_http(meilisearch, auth_controller, opt, analytics).await?; + run_http(index_scheduler, auth_controller, opt, analytics).await?; Ok(()) } async fn run_http( - data: MeiliSearch, + index_scheduler: Arc, auth_controller: AuthController, opt: Opt, analytics: Arc, ) -> anyhow::Result<()> { - let _enable_dashboard = &opt.env == "development"; + let enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); + let index_scheduler = Data::from(index_scheduler); + let http_server = HttpServer::new(move || { - create_app!( - data, - auth_controller, - _enable_dashboard, - opt_clone, - analytics.clone() + create_app( + index_scheduler.clone(), + auth_controller.clone(), + opt.clone(), + analytics.clone(), + enable_dashboard, ) }) // Disable signals allows the server to terminate immediately when a user enter CTRL-C .disable_signals() .keep_alive(KeepAlive::Os); - if let Some(config) = opt.get_ssl_config()? { - http_server - .bind_rustls(opt.http_addr, config)? - .run() - .await?; + if let Some(config) = opt_clone.get_ssl_config()? { + http_server.bind_rustls(opt_clone.http_addr, config)?.run().await?; } else { - http_server.bind(&opt.http_addr)?.run().await?; + http_server.bind(&opt_clone.http_addr)?.run().await?; } Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str) { +pub fn print_launch_resume( + opt: &Opt, + analytics: Arc, + config_read_from: Option, +) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); - let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { - "https" - } else { - "http" - }; + let protocol = + if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" }; let ascii_name = r#" 888b d888 d8b 888 d8b 888 8888b d8888 Y8P 888 Y8P 888 @@ -118,15 +116,18 @@ pub fn print_launch_resume(opt: &Opt, user: &str) { eprintln!("{}", ascii_name); + eprintln!( + "Config file path:\t{:?}", + config_read_from + .map(|config_file_path| config_file_path.display().to_string()) + .unwrap_or_else(|| "none".to_string()) + ); eprintln!("Database path:\t\t{:?}", opt.db_path); eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr); eprintln!("Environment:\t\t{:?}", opt.env); eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string()); eprintln!("Commit date:\t\t{:?}", commit_date.to_string()); - eprintln!( - "Package version:\t{:?}", - env!("CARGO_PKG_VERSION").to_string() - ); + eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string()); #[cfg(all(not(debug_assertions), feature = "analytics"))] { @@ -144,8 +145,8 @@ Anonymous telemetry:\t\"Enabled\"" } } - if !user.is_empty() { - eprintln!("Instance UID:\t\t\"{}\"", user); + if let Some(instance_uid) = analytics.instance_uid() { + eprintln!("Instance UID:\t\t\"{}\"", instance_uid); } eprintln!(); diff --git a/meilisearch-http/src/metrics.rs b/meilisearch-http/src/metrics.rs index cb4239192..f6fdf756d 100644 --- a/meilisearch-http/src/metrics.rs +++ b/meilisearch-http/src/metrics.rs @@ -1,9 +1,8 @@ use lazy_static::lazy_static; use prometheus::{ opts, register_histogram_vec, register_int_counter_vec, register_int_gauge, - register_int_gauge_vec, + register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, }; -use prometheus::{HistogramVec, IntCounterVec, IntGauge, IntGaugeVec}; const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[ 0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015, @@ -16,19 +15,14 @@ lazy_static! { &["method", "path"] ) .expect("Can't create a metric"); - pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!( - "meilisearch_db_size_bytes", - "Meilisearch Db Size In Bytes" - )) - .expect("Can't create a metric"); + pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = + register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes")) + .expect("Can't create a metric"); pub static ref MEILISEARCH_INDEX_COUNT: IntGauge = register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count")) .expect("Can't create a metric"); pub static ref MEILISEARCH_INDEX_DOCS_COUNT: IntGaugeVec = register_int_gauge_vec!( - opts!( - "meilisearch_index_docs_count", - "Meilisearch Index Docs Count" - ), + opts!("meilisearch_index_docs_count", "Meilisearch Index Docs Count"), &["index"] ) .expect("Can't create a metric"); diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 6848e693d..82d67d5a0 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -1,154 +1,239 @@ -use std::fs; +use std::convert::TryFrom; +use std::env::VarError; +use std::ffi::OsStr; use std::io::{BufReader, Read}; +use std::num::ParseIntError; +use std::ops::Deref; use std::path::PathBuf; +use std::str::FromStr; use std::sync::Arc; +use std::{env, fmt, fs}; -use byte_unit::Byte; +use byte_unit::{Byte, ByteError}; use clap::Parser; -use meilisearch_lib::options::{IndexerOpts, SchedulerConfig}; -use rustls::{ - server::{ - AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, - ServerSessionMemoryCache, - }, - RootCertStore, +use meilisearch_types::milli::update::IndexerConfig; +use rustls::server::{ + AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache, }; +use rustls::RootCertStore; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; +use sysinfo::{RefreshKind, System, SystemExt}; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; -#[derive(Debug, Clone, Parser, Serialize)] -#[clap(version)] +const MEILI_DB_PATH: &str = "MEILI_DB_PATH"; +const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; +const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; +const MEILI_ENV: &str = "MEILI_ENV"; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; +const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE"; +const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE"; +const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; +const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; +const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH"; +const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH"; +const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH"; +const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH"; +const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION"; +const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS"; +const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT"; +const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT"; +const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS"; +const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR"; +const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT"; +const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC"; +const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP"; +const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; +const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; +const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR"; +const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +#[cfg(feature = "metrics")] +const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE"; + +const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml"; +const DEFAULT_DB_PATH: &str = "./data.ms"; +const DEFAULT_HTTP_ADDR: &str = "localhost:7700"; +const DEFAULT_ENV: &str = "development"; +const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB"; +const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB"; +const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB"; +const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/"; +const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400; +const DEFAULT_DUMPS_DIR: &str = "dumps/"; +const DEFAULT_LOG_LEVEL: &str = "INFO"; + +const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; +const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; +const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; +const DEFAULT_LOG_EVERY_N: usize = 100000; + +#[derive(Debug, Clone, Parser, Deserialize)] +#[clap(version, next_display_order = None)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { - /// The destination where the database must be created. - #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] + /// Designates the location where database files will be created and retrieved. + #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] + #[serde(default = "default_db_path")] pub db_path: PathBuf, - /// The address on which the http server will listen. - #[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")] + /// Sets the HTTP address and port Meilisearch will use. + #[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())] + #[serde(default = "default_http_addr")] pub http_addr: String, - /// The master key allowing you to do everything on the server. - #[serde(skip)] - #[clap(long, env = "MEILI_MASTER_KEY")] + /// Sets the instance's master key, automatically protecting all routes except `GET /health`. + #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, - /// This environment variable must be set to `production` if you are running in production. - /// If the server is running in development mode more logs will be displayed, - /// and the master key can be avoided which implies that there is no security on the updates routes. - /// This is useful to debug when integrating the engine with another service. - #[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)] + /// Configures the instance's environment. Value must be either `production` or `development`. + #[clap(long, env = MEILI_ENV, default_value_t = default_env(), value_parser = POSSIBLE_ENV)] + #[serde(default = "default_env")] pub env: String, - /// Do not send analytics to Meili. + /// Deactivates Meilisearch's built-in telemetry when provided. + /// + /// Meilisearch automatically collects data from all instances that do not opt out using this flag. + /// All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted + /// at any time. #[cfg(all(not(debug_assertions), feature = "analytics"))] - #[serde(skip)] // we can't send true - #[clap(long, env = "MEILI_NO_ANALYTICS")] + #[serde(default)] // we can't send true + #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, - /// The maximum size, in bytes, of the main lmdb database directory - #[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")] + /// Sets the maximum size of the index. Value must be given in bytes or explicitly stating a base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). + #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())] + #[serde(default = "default_max_index_size")] pub max_index_size: Byte, - /// The maximum size, in bytes, of the update lmdb database directory - #[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")] + /// Sets the maximum size of the task database. Value must be given in bytes or explicitly stating a + /// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). + #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())] + #[serde(default = "default_max_task_db_size")] pub max_task_db_size: Byte, - /// The maximum size, in bytes, of accepted JSON payloads - #[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")] + /// Sets the maximum size of accepted payloads. Value must be given in bytes or explicitly stating a + /// base unit (for instance: 107374182400, '107.7Gb', or '107374 Mb'). + #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())] + #[serde(default = "default_http_payload_size_limit")] pub http_payload_size_limit: Byte, - /// Read server certificates from CERTFILE. - /// This should contain PEM-format certificates - /// in the right order (the first certificate should - /// certify KEYFILE, the last should be a root CA). - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))] + /// Sets the server's SSL certificates. + #[clap(long, env = MEILI_SSL_CERT_PATH, value_parser)] pub ssl_cert_path: Option, - /// Read private key from KEYFILE. This should be a RSA - /// private key or PKCS8-encoded private key, in PEM format. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))] + /// Sets the server's SSL key files. + #[clap(long, env = MEILI_SSL_KEY_PATH, value_parser)] pub ssl_key_path: Option, - /// Enable client authentication, and accept certificates - /// signed by those roots provided in CERTFILE. - #[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))] - #[serde(skip)] + /// Enables client authentication in the specified path. + #[clap(long, env = MEILI_SSL_AUTH_PATH, value_parser)] pub ssl_auth_path: Option, - /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate. - /// Optional - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))] + /// Sets the server's OCSP file. *Optional* + /// + /// Reads DER-encoded OCSP response from OCSPFILE and staple to certificate. + #[clap(long, env = MEILI_SSL_OCSP_PATH, value_parser)] pub ssl_ocsp_path: Option, - /// Send a fatal alert if the client does not complete client authentication. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")] + /// Makes SSL authentication mandatory. + #[serde(default)] + #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, - /// SSL support session resumption - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_RESUMPTION")] + /// Activates SSL session resumption. + #[serde(default)] + #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, - /// SSL support tickets. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_TICKETS")] + /// Activates SSL tickets. + #[serde(default)] + #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, - /// Defines the path of the snapshot file to import. - /// This option will, by default, stop the process if a database already exist or if no snapshot exists at - /// the given path. If this option is not specified no snapshot is imported. - #[clap(long)] + /// Launches Meilisearch after importing a previously-generated snapshot at the given filepath. + #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, - /// The engine will ignore a missing snapshot and not return an error in such case. - #[clap(long, requires = "import-snapshot")] + /// Prevents a Meilisearch instance from throwing an error when `--import-snapshot` + /// does not point to a valid snapshot file. + /// + /// This command will throw an error if `--import-snapshot` is not defined. + #[clap( + long, + env = MEILI_IGNORE_MISSING_SNAPSHOT, + requires = "import_snapshot" + )] + #[serde(default)] pub ignore_missing_snapshot: bool, - /// The engine will skip snapshot importation and not return an error in such case. - #[clap(long, requires = "import-snapshot")] + /// Prevents a Meilisearch instance with an existing database from throwing an + /// error when using `--import-snapshot`. Instead, the snapshot will be ignored + /// and Meilisearch will launch using the existing database. + /// + /// This command will throw an error if `--import-snapshot` is not defined. + #[clap( + long, + env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS, + requires = "import_snapshot" + )] + #[serde(default)] pub ignore_snapshot_if_db_exists: bool, - /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. - #[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")] + /// Sets the directory where Meilisearch will store snapshots. + #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] + #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, - /// Activate snapshot scheduling. - #[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")] + /// Activates scheduled snapshots when provided. Snapshots are disabled by default. + #[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)] + #[serde(default)] pub schedule_snapshot: bool, - /// Defines time interval, in seconds, between each snapshot creation. - #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h + /// Defines the interval between each snapshot. Value must be given in seconds. + #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())] + #[serde(default = "default_snapshot_interval_sec")] pub snapshot_interval_sec: u64, - /// Import a dump from the specified path, must be a `.dump` file. - #[clap(long, conflicts_with = "import-snapshot")] + /// Imports the dump file located at the specified path. Path must point to a `.dump` file. + /// If a database already exists, Meilisearch will throw an error and abort launch. + #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import_snapshot")] pub import_dump: Option, - /// If the dump doesn't exists, load or create the database specified by `db-path` instead. - #[clap(long, requires = "import-dump")] + /// Prevents Meilisearch from throwing an error when `--import-dump` does not point to + /// a valid dump file. Instead, Meilisearch will start normally without importing any dump. + /// + /// This option will trigger an error if `--import-dump` is not defined. + #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import_dump")] + #[serde(default)] pub ignore_missing_dump: bool, - /// Ignore the dump if a database already exists, and load that database instead. - #[clap(long, requires = "import-dump")] + /// Prevents a Meilisearch instance with an existing database from throwing an error + /// when using `--import-dump`. Instead, the dump will be ignored and Meilisearch will + /// launch using the existing database. + /// + /// This option will trigger an error if `--import-dump` is not defined. + #[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import_dump")] + #[serde(default)] pub ignore_dump_if_db_exists: bool, - /// Folder where dumps are created when the dump route is called. - #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] + /// Sets the directory where Meilisearch will create dump files. + #[clap(long, env = MEILI_DUMPS_DIR, default_value_os_t = default_dumps_dir())] + #[serde(default = "default_dumps_dir")] pub dumps_dir: PathBuf, - /// Set the log level - #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] + /// Defines how much detail should be present in Meilisearch's logs. + /// + /// Meilisearch currently supports five log levels, listed in order of increasing verbosity: ERROR, WARN, INFO, DEBUG, TRACE. + #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())] + #[serde(default = "default_log_level")] pub log_level: String, /// Enables Prometheus metrics and /metrics route. #[cfg(feature = "metrics")] - #[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")] + #[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)] + #[serde(default)] pub enable_metrics_route: bool, #[serde(flatten)] @@ -158,15 +243,149 @@ pub struct Opt { #[serde(flatten)] #[clap(flatten)] pub scheduler_options: SchedulerConfig, + + /// Set the path to a configuration file that should be used to setup the engine. + /// Format must be TOML. + #[clap(long)] + pub config_file_path: Option, } impl Opt { - /// Wether analytics should be enabled or not. + /// Whether analytics should be enabled or not. #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics } + /// Build a new Opt from config file, env vars and cli args. + pub fn try_build() -> anyhow::Result<(Self, Option)> { + // Parse the args to get the config_file_path. + let mut opts = Opt::parse(); + let mut config_read_from = None; + let user_specified_config_file_path = opts + .config_file_path + .clone() + .or_else(|| env::var("MEILI_CONFIG_FILE_PATH").map(PathBuf::from).ok()); + let config_file_path = user_specified_config_file_path + .clone() + .unwrap_or_else(|| PathBuf::from(DEFAULT_CONFIG_FILE_PATH)); + + match std::fs::read(&config_file_path) { + Ok(config) => { + // If the file is successfully read, we deserialize it with `toml`. + let opt_from_config = toml::from_slice::(&config)?; + // Return an error if config file contains 'config_file_path' + // Using that key in the config file doesn't make sense bc it creates a logical loop (config file referencing itself) + if opt_from_config.config_file_path.is_some() { + anyhow::bail!("`config_file_path` is not supported in the configuration file") + } + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. + opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. + opts = Opt::parse(); + config_read_from = Some(config_file_path); + } + Err(e) => { + if let Some(path) = user_specified_config_file_path { + // If we have an error while reading the file defined by the user. + anyhow::bail!( + "unable to open or read the {:?} configuration file: {}.", + path, + e, + ) + } + } + } + + Ok((opts, config_read_from)) + } + + /// Exports the opts values to their corresponding env vars if they are not set. + fn export_to_env(self) { + let Opt { + db_path, + http_addr, + master_key, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + import_snapshot: _, + ignore_missing_snapshot: _, + ignore_snapshot_if_db_exists: _, + import_dump: _, + ignore_missing_dump: _, + ignore_dump_if_db_exists: _, + config_file_path: _, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics, + #[cfg(feature = "metrics")] + enable_metrics_route, + } = self; + export_to_env_if_not_present(MEILI_DB_PATH, db_path); + export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); + if let Some(master_key) = master_key { + export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); + } + export_to_env_if_not_present(MEILI_ENV, env); + #[cfg(all(not(debug_assertions), feature = "analytics"))] + { + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); + } + export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string()); + export_to_env_if_not_present( + MEILI_HTTP_PAYLOAD_SIZE_LIMIT, + http_payload_size_limit.to_string(), + ); + if let Some(ssl_cert_path) = ssl_cert_path { + export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); + } + if let Some(ssl_key_path) = ssl_key_path { + export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); + } + if let Some(ssl_auth_path) = ssl_auth_path { + export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); + } + if let Some(ssl_ocsp_path) = ssl_ocsp_path { + export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); + } + export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string()); + export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string()); + export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string()); + export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir); + export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string()); + export_to_env_if_not_present( + MEILI_SNAPSHOT_INTERVAL_SEC, + snapshot_interval_sec.to_string(), + ); + export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir); + export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level); + #[cfg(feature = "metrics")] + { + export_to_env_if_not_present( + MEILI_ENABLE_METRICS_ROUTE, + enable_metrics_route.to_string(), + ); + } + indexer_options.export_to_env(); + scheduler_options.export_to_env(); + } + pub fn get_ssl_config(&self) -> anyhow::Result> { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { let config = rustls::ServerConfig::builder().with_safe_defaults(); @@ -214,6 +433,185 @@ impl Opt { } } +#[derive(Debug, Clone, Parser, Deserialize)] +pub struct IndexerOpts { + /// Sets the amount of documents to skip before printing + /// a log regarding the indexing advancement. + #[serde(default = "default_log_every_n")] + #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k + pub log_every_n: usize, + + /// Grenad max number of chunks in bytes. + #[clap(long, hide = true)] + pub max_nb_chunks: Option, + + /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch + /// uses no more than two thirds of available memory. + #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] + #[serde(default)] + pub max_indexing_memory: MaxMemory, + + /// Sets the maximum number of threads Meilisearch can use during indexation. By default, the + /// indexer avoids using more than half of a machine's total processing units. This ensures + /// Meilisearch is always ready to perform searches, even while you are updating an index. + #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] + #[serde(default)] + pub max_indexing_threads: MaxThreads, +} + +impl IndexerOpts { + /// Exports the values to their corresponding env vars if they are not set. + pub fn export_to_env(self) { + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + log_every_n: _, + max_nb_chunks: _, + } = self; + if let Some(max_indexing_memory) = max_indexing_memory.0 { + export_to_env_if_not_present( + MEILI_MAX_INDEXING_MEMORY, + max_indexing_memory.to_string(), + ); + } + export_to_env_if_not_present( + MEILI_MAX_INDEXING_THREADS, + max_indexing_threads.0.to_string(), + ); + } +} + +#[derive(Debug, Clone, Parser, Default, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] +pub struct SchedulerConfig { + /// Deactivates auto-batching when provided. + #[clap(long, env = DISABLE_AUTO_BATCHING)] + #[serde(default)] + pub disable_auto_batching: bool, +} + +impl SchedulerConfig { + pub fn export_to_env(self) { + let SchedulerConfig { disable_auto_batching } = self; + export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); + } +} + +impl TryFrom<&IndexerOpts> for IndexerConfig { + type Error = anyhow::Error; + + fn try_from(other: &IndexerOpts) -> Result { + let thread_pool = rayon::ThreadPoolBuilder::new() + .thread_name(|index| format!("indexing-thread:{index}")) + .num_threads(*other.max_indexing_threads) + .build()?; + + Ok(Self { + log_every_n: Some(other.log_every_n), + max_nb_chunks: other.max_nb_chunks, + max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), + thread_pool: Some(thread_pool), + max_positions_per_attributes: None, + ..Default::default() + }) + } +} + +impl Default for IndexerOpts { + fn default() -> Self { + Self { + log_every_n: 100_000, + max_nb_chunks: None, + max_indexing_memory: MaxMemory::default(), + max_indexing_threads: MaxThreads::default(), + } + } +} + +/// A type used to detect the max memory available and use 2/3 of it. +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +pub struct MaxMemory(Option); + +impl FromStr for MaxMemory { + type Err = ByteError; + + fn from_str(s: &str) -> Result { + Byte::from_str(s).map(Some).map(MaxMemory) + } +} + +impl Default for MaxMemory { + fn default() -> MaxMemory { + MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_bytes)) + } +} + +impl fmt::Display for MaxMemory { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0 { + Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), + None => f.write_str("unknown"), + } + } +} + +impl Deref for MaxMemory { + type Target = Option; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl MaxMemory { + pub fn unlimited() -> Self { + Self(None) + } +} + +/// Returns the total amount of bytes available or `None` if this system isn't supported. +fn total_memory_bytes() -> Option { + if System::IS_SUPPORTED { + let memory_kind = RefreshKind::new().with_memory(); + let mut system = System::new_with_specifics(memory_kind); + system.refresh_memory(); + Some(system.total_memory()) + } else { + None + } +} + +#[derive(Debug, Clone, Copy, Deserialize, Serialize)] +pub struct MaxThreads(usize); + +impl FromStr for MaxThreads { + type Err = ParseIntError; + + fn from_str(s: &str) -> Result { + usize::from_str(s).map(Self) + } +} + +impl Default for MaxThreads { + fn default() -> Self { + MaxThreads(num_cpus::get() / 2) + } +} + +impl fmt::Display for MaxThreads { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl Deref for MaxThreads { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + fn load_certs(filename: PathBuf) -> anyhow::Result> { let certfile = fs::File::open(filename).map_err(|_| anyhow::anyhow!("cannot open certificate file"))?; @@ -265,12 +663,99 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } +/// Checks if the key is defined in the environment variables. +/// If not, inserts it with the given value. +pub fn export_to_env_if_not_present(key: &str, value: T) +where + T: AsRef, +{ + if let Err(VarError::NotPresent) = std::env::var(key) { + std::env::set_var(key, value); + } +} + +/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute. + +fn default_db_path() -> PathBuf { + PathBuf::from(DEFAULT_DB_PATH) +} + +pub fn default_http_addr() -> String { + DEFAULT_HTTP_ADDR.to_string() +} + +fn default_env() -> String { + DEFAULT_ENV.to_string() +} + +fn default_max_index_size() -> Byte { + Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap() +} + +fn default_max_task_db_size() -> Byte { + Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap() +} + +fn default_http_payload_size_limit() -> Byte { + Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap() +} + +fn default_snapshot_dir() -> PathBuf { + PathBuf::from(DEFAULT_SNAPSHOT_DIR) +} + +fn default_snapshot_interval_sec() -> u64 { + DEFAULT_SNAPSHOT_INTERVAL_SEC +} + +fn default_dumps_dir() -> PathBuf { + PathBuf::from(DEFAULT_DUMPS_DIR) +} + +fn default_log_level() -> String { + DEFAULT_LOG_LEVEL.to_string() +} + +fn default_log_every_n() -> usize { + DEFAULT_LOG_EVERY_N +} + #[cfg(test)] mod test { + use super::*; #[test] fn test_valid_opt() { assert!(Opt::try_parse_from(Some("")).is_ok()); } + + #[test] + #[ignore] + fn test_meilli_config_file_path_valid() { + temp_env::with_vars( + vec![("MEILI_CONFIG_FILE_PATH", Some("../config.toml"))], // Relative path in meilisearch_http package + || { + assert!(Opt::try_build().is_ok()); + }, + ); + } + + #[test] + #[ignore] + fn test_meilli_config_file_path_invalid() { + temp_env::with_vars(vec![("MEILI_CONFIG_FILE_PATH", Some("../configgg.toml"))], || { + let possible_error_messages = [ + "unable to open or read the \"../configgg.toml\" configuration file: No such file or directory (os error 2).", + "unable to open or read the \"../configgg.toml\" configuration file: The system cannot find the file specified. (os error 2).", // Windows + ]; + let error_message = Opt::try_build().unwrap_err().to_string(); + assert!( + possible_error_messages.contains(&error_message.as_str()), + "Expected onf of {:?}, got {:?}.", + possible_error_messages, + error_message + ); + }); + } } diff --git a/meilisearch-http/src/route_metrics.rs b/meilisearch-http/src/route_metrics.rs index b2b5f4abc..c1d35cf8d 100644 --- a/meilisearch-http/src/route_metrics.rs +++ b/meilisearch-http/src/route_metrics.rs @@ -1,17 +1,13 @@ use std::future::{ready, Ready}; +use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform}; use actix_web::http::header; -use actix_web::HttpResponse; -use actix_web::{ - dev::{self, Service, ServiceRequest, ServiceResponse, Transform}, - Error, -}; +use actix_web::{Error, HttpResponse}; use futures_util::future::LocalBoxFuture; use meilisearch_auth::actions; use meilisearch_lib::MeiliSearch; use meilisearch_types::error::ResponseError; -use prometheus::HistogramTimer; -use prometheus::{Encoder, TextEncoder}; +use prometheus::{Encoder, HistogramTimer, TextEncoder}; use crate::extractors::authentication::policies::ActionPolicy; use crate::extractors::authentication::GuardedData; @@ -33,15 +29,11 @@ pub async fn get_metrics( let encoder = TextEncoder::new(); let mut buffer = vec![]; - encoder - .encode(&prometheus::gather(), &mut buffer) - .expect("Failed to encode metrics"); + encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics"); let response = String::from_utf8(buffer).expect("Failed to convert bytes to string"); - Ok(HttpResponse::Ok() - .insert_header(header::ContentType(mime::TEXT_PLAIN)) - .body(response)) + Ok(HttpResponse::Ok().insert_header(header::ContentType(mime::TEXT_PLAIN)).body(response)) } pub struct RouteMetrics; diff --git a/meilisearch-http/src/routes/api_key.rs b/meilisearch-http/src/routes/api_key.rs index 7605fa644..b53fd3895 100644 --- a/meilisearch-http/src/routes/api_key.rs +++ b/meilisearch-http/src/routes/api_key.rs @@ -1,18 +1,18 @@ use std::str; use actix_web::{web, HttpRequest, HttpResponse}; +use meilisearch_auth::error::AuthControllerError; +use meilisearch_auth::AuthController; +use meilisearch_types::error::{Code, ResponseError}; +use meilisearch_types::keys::{Action, Key}; use serde::{Deserialize, Serialize}; use serde_json::Value; use time::OffsetDateTime; use uuid::Uuid; -use meilisearch_auth::{error::AuthControllerError, Action, AuthController, Key}; -use meilisearch_types::error::{Code, ResponseError}; - -use crate::extractors::{ - authentication::{policies::*, GuardedData}, - sequential_extractor::SeqHandler, -}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::extractors::sequential_extractor::SeqHandler; use crate::routes::Pagination; pub fn configure(cfg: &mut web::ServiceConfig) { @@ -51,10 +51,8 @@ pub async fn list_api_keys( ) -> Result { let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> { let keys = auth_controller.list_keys()?; - let page_view = paginate.auto_paginate_sized( - keys.into_iter() - .map(|k| KeyView::from_key(k, &auth_controller)), - ); + let page_view = paginate + .auto_paginate_sized(keys.into_iter().map(|k| KeyView::from_key(k, &auth_controller))); Ok(page_view) }) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 4d9106ee0..8e0e63776 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,27 +1,37 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_lib::MeiliSearch; +use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; +use meilisearch_types::tasks::KindWithContent; use serde_json::json; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::task::SummarizedTaskView; +use crate::routes::SummarizedTaskView; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump)))); } pub async fn create_dump( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, + auth_controller: GuardedData, AuthController>, req: HttpRequest, analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); - let res: SummarizedTaskView = meilisearch.register_dump_task().await?.into(); + let task = KindWithContent::DumpCreation { + keys: auth_controller.list_keys()?, + instance_uid: analytics.instance_uid().cloned(), + }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); - debug!("returns: {:?}", res); - Ok(HttpResponse::Accepted().json(res)) + debug!("returns: {:?}", task); + Ok(HttpResponse::Accepted().json(task)) } diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 3e3db86b2..0fe3cf102 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -1,50 +1,38 @@ -use actix_web::error::PayloadError; +use std::io::{Cursor, ErrorKind}; + use actix_web::http::header::CONTENT_TYPE; -use actix_web::web::Bytes; -use actix_web::HttpMessage; -use actix_web::{web, HttpRequest, HttpResponse}; +use actix_web::web::Data; +use actix_web::{web, HttpMessage, HttpRequest, HttpResponse}; use bstr::ByteSlice; -use futures::{Stream, StreamExt}; +use futures::StreamExt; +use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_lib::index_controller::{DocumentAdditionFormat, Update}; -use meilisearch_lib::milli::update::IndexDocumentsMethod; -use meilisearch_lib::MeiliSearch; +use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; use meilisearch_types::error::ResponseError; +use meilisearch_types::heed::RoTxn; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::star_or::StarOr; +use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::{milli, Document, Index}; use mime::Mime; use once_cell::sync::Lazy; use serde::Deserialize; use serde_cs::vec::CS; use serde_json::Value; -use tokio::sync::mpsc; -use crate::analytics::Analytics; +use crate::analytics::{Analytics, DocumentDeletionKind}; use crate::error::MeilisearchHttpError; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::payload::Payload; use crate::extractors::sequential_extractor::SeqHandler; -use crate::routes::{fold_star_or, PaginationView}; -use crate::task::SummarizedTaskView; +use crate::routes::{fold_star_or, PaginationView, SummarizedTaskView}; static ACCEPTED_CONTENT_TYPE: Lazy> = Lazy::new(|| { - vec![ - "application/json".to_string(), - "application/x-ndjson".to_string(), - "text/csv".to_string(), - ] + vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()] }); -/// This is required because Payload is not Sync nor Send -fn payload_to_stream(mut payload: Payload) -> impl Stream> { - let (snd, recv) = mpsc::channel(1); - tokio::task::spawn_local(async move { - while let Some(data) = payload.next().await { - let _ = snd.send(data).await; - } - }); - tokio_stream::wrappers::ReceiverStream::new(recv) -} - /// Extracts the mime type from the content type and return /// a meilisearch error if anything bad happen. fn extract_mime_type(req: &HttpRequest) -> Result, MeilisearchHttpError> { @@ -56,9 +44,7 @@ fn extract_mime_type(req: &HttpRequest) -> Result, MeilisearchHttpE content_type.as_bytes().as_bstr().to_string(), ACCEPTED_CONTENT_TYPE.clone(), )), - None => Err(MeilisearchHttpError::MissingContentType( - ACCEPTED_CONTENT_TYPE.clone(), - )), + None => Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())), }, } } @@ -93,32 +79,31 @@ pub struct GetDocument { } pub async fn get_document( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, params: web::Query, ) -> Result { - let index = path.index_uid.clone(); - let id = path.document_id.clone(); let GetDocument { fields } = params.into_inner(); let attributes_to_retrieve = fields.and_then(fold_star_or); - let document = meilisearch - .document(index, id, attributes_to_retrieve) - .await?; + let index = index_scheduler.index(&path.index_uid)?; + let document = retrieve_document(&index, &path.document_id, attributes_to_retrieve)?; debug!("returns: {:?}", document); Ok(HttpResponse::Ok().json(document)) } pub async fn delete_document( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, + req: HttpRequest, + analytics: web::Data, ) -> Result { - let DocumentParam { - document_id, - index_uid, - } = path.into_inner(); - let update = Update::DeleteDocuments(vec![document_id]); - let task: SummarizedTaskView = meilisearch.register_update(index_uid, update).await?.into(); + analytics.delete_documents(DocumentDeletionKind::PerDocumentId, &req); + + let DocumentParam { document_id, index_uid } = path.into_inner(); + let task = KindWithContent::DocumentDeletion { index_uid, documents_ids: vec![document_id] }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } @@ -134,21 +119,16 @@ pub struct BrowseQuery { } pub async fn get_all_documents( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Query, ) -> Result { debug!("called with params: {:?}", params); - let BrowseQuery { - limit, - offset, - fields, - } = params.into_inner(); + let BrowseQuery { limit, offset, fields } = params.into_inner(); let attributes_to_retrieve = fields.and_then(fold_star_or); - let (total, documents) = meilisearch - .documents(path.into_inner(), offset, limit, attributes_to_retrieve) - .await?; + let index = index_scheduler.index(&index_uid)?; + let (total, documents) = retrieve_documents(&index, offset, limit, attributes_to_retrieve)?; let ret = PaginationView::new(offset, limit, total as usize, documents); @@ -163,8 +143,8 @@ pub struct UpdateDocumentsQuery { } pub async fn add_documents( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Query, body: Payload, req: HttpRequest, @@ -172,19 +152,14 @@ pub async fn add_documents( ) -> Result { debug!("called with params: {:?}", params); let params = params.into_inner(); - let index_uid = path.into_inner(); - analytics.add_documents( - ¶ms, - meilisearch.get_index(index_uid.clone()).await.is_err(), - &req, - ); + analytics.add_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); - let allow_index_creation = meilisearch.filters().allow_index_creation; + let allow_index_creation = index_scheduler.filters().allow_index_creation; let task = document_addition( extract_mime_type(&req)?, - meilisearch, - index_uid, + index_scheduler, + index_uid.into_inner(), params.primary_key, body, IndexDocumentsMethod::ReplaceDocuments, @@ -196,7 +171,7 @@ pub async fn add_documents( } pub async fn update_documents( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, params: web::Query, body: Payload, @@ -206,16 +181,12 @@ pub async fn update_documents( debug!("called with params: {:?}", params); let index_uid = path.into_inner(); - analytics.update_documents( - ¶ms, - meilisearch.get_index(index_uid.clone()).await.is_err(), - &req, - ); + analytics.update_documents(¶ms, index_scheduler.index(&index_uid).is_err(), &req); - let allow_index_creation = meilisearch.filters().allow_index_creation; + let allow_index_creation = index_scheduler.filters().allow_index_creation; let task = document_addition( extract_mime_type(&req)?, - meilisearch, + index_scheduler, index_uid, params.into_inner().primary_key, body, @@ -229,83 +200,211 @@ pub async fn update_documents( async fn document_addition( mime_type: Option, - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: String, primary_key: Option, - body: Payload, + mut body: Payload, method: IndexDocumentsMethod, allow_index_creation: bool, -) -> Result { - let format = match mime_type - .as_ref() - .map(|m| (m.type_().as_str(), m.subtype().as_str())) - { - Some(("application", "json")) => DocumentAdditionFormat::Json, - Some(("application", "x-ndjson")) => DocumentAdditionFormat::Ndjson, - Some(("text", "csv")) => DocumentAdditionFormat::Csv, +) -> Result { + let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) { + Some(("application", "json")) => PayloadType::Json, + Some(("application", "x-ndjson")) => PayloadType::Ndjson, + Some(("text", "csv")) => PayloadType::Csv, Some((type_, subtype)) => { return Err(MeilisearchHttpError::InvalidContentType( format!("{}/{}", type_, subtype), ACCEPTED_CONTENT_TYPE.clone(), - ) - .into()) + )) } None => { - return Err( - MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()).into(), - ) + return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone())) } }; - let update = Update::DocumentAddition { - payload: Box::new(payload_to_stream(body)), - primary_key, - method, - format, - allow_index_creation, + // is your indexUid valid? + let index_uid = IndexUid::try_from(index_uid)?.into_inner(); + + let (uuid, mut update_file) = index_scheduler.create_update_file()?; + + // TODO: this can be slow, maybe we should spawn a thread? But the payload isn't Send+Sync :weary: + // push the entire stream into a `Vec`. + // If someone sends us a never ending stream we're going to block the thread. + // TODO: Maybe we should write it to a file to reduce the RAM consumption + // and then reread it to convert it to obkv? + let mut buffer = Vec::new(); + while let Some(bytes) = body.next().await { + buffer.extend_from_slice(&bytes?); + } + if buffer.is_empty() { + return Err(MeilisearchHttpError::MissingPayload(format)); + } + let reader = Cursor::new(buffer); + + let documents_count = + tokio::task::spawn_blocking(move || -> Result<_, MeilisearchHttpError> { + let documents_count = match format { + PayloadType::Json => read_json(reader, update_file.as_file_mut())?, + PayloadType::Csv => read_csv(reader, update_file.as_file_mut())?, + PayloadType::Ndjson => read_ndjson(reader, update_file.as_file_mut())?, + }; + // we NEED to persist the file here because we moved the `udpate_file` in another task. + update_file.persist()?; + Ok(documents_count) + }) + .await; + + let documents_count = match documents_count { + Ok(Ok(documents_count)) => documents_count as u64, + // in this case the file has not possibly be persisted. + Ok(Err(e)) => return Err(e), + Err(e) => { + // Here the file MAY have been persisted or not. + // We don't know thus we ignore the file not found error. + match index_scheduler.delete_update_file(uuid) { + Ok(()) => (), + Err(index_scheduler::Error::FileStore(file_store::Error::IoError(e))) + if e.kind() == ErrorKind::NotFound => {} + Err(e) => { + log::warn!("Unknown error happened while deleting a malformed update file with uuid {uuid}: {e}"); + } + } + // We still want to return the original error to the end user. + return Err(e.into()); + } }; - let task = meilisearch.register_update(index_uid, update).await?.into(); + let task = KindWithContent::DocumentAdditionOrUpdate { + method, + content_file: uuid, + documents_count, + primary_key, + allow_index_creation, + index_uid, + }; + + let scheduler = index_scheduler.clone(); + let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? { + Ok(task) => task, + Err(e) => { + index_scheduler.delete_update_file(uuid)?; + return Err(e.into()); + } + }; debug!("returns: {:?}", task); - Ok(task) + Ok(task.into()) } pub async fn delete_documents( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, body: web::Json>, + req: HttpRequest, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", body); + + analytics.delete_documents(DocumentDeletionKind::PerBatch, &req); + let ids = body .iter() - .map(|v| { - v.as_str() - .map(String::from) - .unwrap_or_else(|| v.to_string()) - }) + .map(|v| v.as_str().map(String::from).unwrap_or_else(|| v.to_string())) .collect(); - let update = Update::DeleteDocuments(ids); - let task: SummarizedTaskView = meilisearch - .register_update(path.into_inner(), update) - .await? - .into(); + let task = + KindWithContent::DocumentDeletion { index_uid: path.into_inner(), documents_ids: ids }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn clear_all_documents( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, + req: HttpRequest, + analytics: web::Data, ) -> Result { - let update = Update::ClearDocuments; - let task: SummarizedTaskView = meilisearch - .register_update(path.into_inner(), update) - .await? - .into(); + analytics.delete_documents(DocumentDeletionKind::ClearAll, &req); + + let task = KindWithContent::DocumentClear { index_uid: path.into_inner() }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } + +fn all_documents<'a>( + index: &Index, + rtxn: &'a RoTxn, +) -> Result> + 'a, ResponseError> { + let fields_ids_map = index.fields_ids_map(rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + Ok(index.all_documents(rtxn)?.map(move |ret| { + ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> { + Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?) + }) + })) +} + +fn retrieve_documents>( + index: &Index, + offset: usize, + limit: usize, + attributes_to_retrieve: Option>, +) -> Result<(u64, Vec), ResponseError> { + let rtxn = index.read_txn()?; + + let mut documents = Vec::new(); + for document in all_documents(index, &rtxn)?.skip(offset).take(limit) { + let document = match &attributes_to_retrieve { + Some(attributes_to_retrieve) => permissive_json_pointer::select_values( + &document?, + attributes_to_retrieve.iter().map(|s| s.as_ref()), + ), + None => document?, + }; + documents.push(document); + } + + let number_of_documents = index.number_of_documents(&rtxn)?; + Ok((number_of_documents, documents)) +} + +fn retrieve_document>( + index: &Index, + doc_id: &str, + attributes_to_retrieve: Option>, +) -> Result { + let txn = index.read_txn()?; + + let fields_ids_map = index.fields_ids_map(&txn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + + let internal_id = index + .external_documents_ids(&txn)? + .get(doc_id.as_bytes()) + .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; + + let document = index + .documents(&txn, std::iter::once(internal_id))? + .into_iter() + .next() + .map(|(_, d)| d) + .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?; + + let document = meilisearch_types::milli::obkv_to_json(&all_fields, &fields_ids_map, document)?; + let document = match &attributes_to_retrieve { + Some(attributes_to_retrieve) => permissive_json_pointer::select_values( + &document, + attributes_to_retrieve.iter().map(|s| s.as_ref()), + ), + None => document, + }; + + Ok(document) +} diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 3fa0adba8..e8fca0cf8 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -1,18 +1,20 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::IndexScheduler; use log::debug; -use meilisearch_lib::index_controller::Update; -use meilisearch_lib::MeiliSearch; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::milli::{self, FieldDistribution, Index}; +use meilisearch_types::tasks::KindWithContent; use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; +use super::{Pagination, SummarizedTaskView}; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; use crate::extractors::sequential_extractor::SeqHandler; -use crate::task::SummarizedTaskView; - -use super::Pagination; pub mod documents; pub mod search; @@ -39,19 +41,42 @@ pub fn configure(cfg: &mut web::ServiceConfig) { ); } +#[derive(Debug, Serialize, Deserialize, Clone)] +#[serde(rename_all = "camelCase")] +pub struct IndexView { + pub uid: String, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, + pub primary_key: Option, +} + +impl IndexView { + fn new(uid: String, index: &Index) -> Result { + let rtxn = index.read_txn()?; + Ok(IndexView { + uid, + created_at: index.created_at(&rtxn)?, + updated_at: index.updated_at(&rtxn)?, + primary_key: index.primary_key(&rtxn)?.map(String::from), + }) + } +} + pub async fn list_indexes( - data: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, paginate: web::Query, ) -> Result { - let search_rules = &data.filters().search_rules; - let indexes: Vec<_> = data.list_indexes().await?; - let nb_indexes = indexes.len(); - let iter = indexes + let search_rules = &index_scheduler.filters().search_rules; + let indexes: Vec<_> = index_scheduler.indexes()?; + let indexes = indexes .into_iter() - .filter(|i| search_rules.is_index_authorized(&i.uid)); - let ret = paginate - .into_inner() - .auto_paginate_unsized(nb_indexes, iter); + .filter(|(name, _)| search_rules.is_index_authorized(name)) + .map(|(name, index)| IndexView::new(name, &index)) + .collect::, _>>()?; + + let ret = paginate.auto_paginate_sized(indexes.into_iter()); debug!("returns: {:?}", ret); Ok(HttpResponse::Ok().json(ret)) @@ -65,16 +90,15 @@ pub struct IndexCreateRequest { } pub async fn create_index( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, body: web::Json, req: HttpRequest, analytics: web::Data, ) -> Result { - let IndexCreateRequest { - primary_key, uid, .. - } = body.into_inner(); + let IndexCreateRequest { primary_key, uid } = body.into_inner(); + let uid = IndexUid::try_from(uid)?.into_inner(); - let allow_index_creation = meilisearch.filters().search_rules.is_index_authorized(&uid); + let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid); if allow_index_creation { analytics.publish( "Index Created".to_string(), @@ -82,8 +106,9 @@ pub async fn create_index( Some(&req), ); - let update = Update::CreateIndex { primary_key }; - let task: SummarizedTaskView = meilisearch.register_update(uid, update).await?.into(); + let task = KindWithContent::IndexCreation { index_uid: uid, primary_key }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); Ok(HttpResponse::Accepted().json(task)) } else { @@ -99,30 +124,20 @@ pub struct UpdateIndexRequest { primary_key: Option, } -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct UpdateIndexResponse { - name: String, - uid: String, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - created_at: OffsetDateTime, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - updated_at: OffsetDateTime, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - primary_key: OffsetDateTime, -} - pub async fn get_index( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, ) -> Result { - let meta = meilisearch.get_index(path.into_inner()).await?; - debug!("returns: {:?}", meta); - Ok(HttpResponse::Ok().json(meta)) + let index = index_scheduler.index(&index_uid)?; + let index_view = IndexView::new(index_uid.into_inner(), &index)?; + + debug!("returns: {:?}", index_view); + + Ok(HttpResponse::Ok().json(index_view)) } pub async fn update_index( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, path: web::Path, body: web::Json, req: HttpRequest, @@ -136,43 +151,64 @@ pub async fn update_index( Some(&req), ); - let update = Update::UpdateIndex { + let task = KindWithContent::IndexUpdate { + index_uid: path.into_inner(), primary_key: body.primary_key, }; - let task: SummarizedTaskView = meilisearch - .register_update(path.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn delete_index( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, ) -> Result { - let uid = path.into_inner(); - let update = Update::DeleteIndex; - let task: SummarizedTaskView = meilisearch.register_update(uid, update).await?.into(); + let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() }; + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); Ok(HttpResponse::Accepted().json(task)) } pub async fn get_index_stats( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, req: HttpRequest, analytics: web::Data, ) -> Result { - analytics.publish( - "Stats Seen".to_string(), - json!({ "per_index_uid": true }), - Some(&req), - ); - let response = meilisearch.get_index_stats(path.into_inner()).await?; + analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": true }), Some(&req)); - debug!("returns: {:?}", response); - Ok(HttpResponse::Ok().json(response)) + let stats = IndexStats::new((*index_scheduler).clone(), index_uid.into_inner())?; + + debug!("returns: {:?}", stats); + Ok(HttpResponse::Ok().json(stats)) +} + +#[derive(Serialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct IndexStats { + pub number_of_documents: u64, + pub is_indexing: bool, + pub field_distribution: FieldDistribution, +} + +impl IndexStats { + pub fn new( + index_scheduler: Data, + index_uid: String, + ) -> Result { + // we check if there is currently a task processing associated with this index. + let is_processing = index_scheduler.is_index_processing(&index_uid)?; + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + Ok(IndexStats { + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing: is_processing, + field_distribution: index.field_distribution(&rtxn)?, + }) + } } diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 973d5eb6e..af5da44a0 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -1,19 +1,22 @@ +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::IndexScheduler; use log::debug; use meilisearch_auth::IndexSearchRules; -use meilisearch_lib::index::{ - MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, -}; -use meilisearch_lib::MeiliSearch; use meilisearch_types::error::ResponseError; use serde::Deserialize; use serde_cs::vec::CS; use serde_json::Value; use crate::analytics::{Analytics, SearchAggregator}; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::search::{ + perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEARCH_OFFSET, +}; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service( @@ -27,8 +30,12 @@ pub fn configure(cfg: &mut web::ServiceConfig) { #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQueryGet { q: Option, - offset: Option, - limit: Option, + #[serde(default = "DEFAULT_SEARCH_OFFSET")] + offset: usize, + #[serde(default = "DEFAULT_SEARCH_LIMIT")] + limit: usize, + page: Option, + hits_per_page: Option, attributes_to_retrieve: Option>, attributes_to_crop: Option>, #[serde(default = "DEFAULT_CROP_LENGTH")] @@ -62,15 +69,13 @@ impl From for SearchQuery { Self { q: other.q, offset: other.offset, - limit: other.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT), - attributes_to_retrieve: other - .attributes_to_retrieve - .map(|o| o.into_iter().collect()), + limit: other.limit, + page: other.page, + hits_per_page: other.hits_per_page, + attributes_to_retrieve: other.attributes_to_retrieve.map(|o| o.into_iter().collect()), attributes_to_crop: other.attributes_to_crop.map(|o| o.into_iter().collect()), crop_length: other.crop_length, - attributes_to_highlight: other - .attributes_to_highlight - .map(|o| o.into_iter().collect()), + attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), show_matches_position: other.show_matches_position, @@ -129,8 +134,8 @@ fn fix_sort_query_parameters(sort_query: &str) -> Vec { } pub async fn search_with_url_query( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Query, req: HttpRequest, analytics: web::Data, @@ -138,19 +143,17 @@ pub async fn search_with_url_query( debug!("called with params: {:?}", params); let mut query: SearchQuery = params.into_inner().into(); - let index_uid = path.into_inner(); // Tenant token search_rules. - if let Some(search_rules) = meilisearch - .filters() - .search_rules - .get_index_search_rules(&index_uid) + if let Some(search_rules) = + index_scheduler.filters().search_rules.get_index_search_rules(&index_uid) { add_search_rules(&mut query, search_rules); } let mut aggregate = SearchAggregator::from_query(&query, &req); - let search_result = meilisearch.search(index_uid, query).await; + let index = index_scheduler.index(&index_uid)?; + let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -163,8 +166,8 @@ pub async fn search_with_url_query( } pub async fn search_with_post( - meilisearch: GuardedData, MeiliSearch>, - path: web::Path, + index_scheduler: GuardedData, Data>, + index_uid: web::Path, params: web::Json, req: HttpRequest, analytics: web::Data, @@ -172,19 +175,17 @@ pub async fn search_with_post( let mut query = params.into_inner(); debug!("search called with params: {:?}", query); - let index_uid = path.into_inner(); // Tenant token search_rules. - if let Some(search_rules) = meilisearch - .filters() - .search_rules - .get_index_search_rules(&index_uid) + if let Some(search_rules) = + index_scheduler.filters().search_rules.get_index_search_rules(&index_uid) { add_search_rules(&mut query, search_rules); } let mut aggregate = SearchAggregator::from_query(&query, &req); - let search_result = meilisearch.search(index_uid, query).await; + let index = index_scheduler.index(&index_uid)?; + let search_result = tokio::task::spawn_blocking(move || perform_search(&index, query)).await?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -205,13 +206,7 @@ mod test { let sort = fix_sort_query_parameters("_geoPoint(12, 13):asc"); assert_eq!(sort, vec!["_geoPoint(12,13):asc".to_string()]); let sort = fix_sort_query_parameters("doggo:asc,_geoPoint(12.45,13.56):desc"); - assert_eq!( - sort, - vec![ - "doggo:asc".to_string(), - "_geoPoint(12.45,13.56):desc".to_string(), - ] - ); + assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(12.45,13.56):desc".to_string(),]); let sort = fix_sort_query_parameters( "doggo:asc , _geoPoint(12.45, 13.56, 2590352):desc , catto:desc", ); @@ -225,12 +220,6 @@ mod test { ); let sort = fix_sort_query_parameters("doggo:asc , _geoPoint(1, 2), catto:desc"); // This is ugly but eh, I don't want to write a full parser just for this unused route - assert_eq!( - sort, - vec![ - "doggo:asc".to_string(), - "_geoPoint(1,2),catto:desc".to_string(), - ] - ); + assert_eq!(sort, vec!["doggo:asc".to_string(), "_geoPoint(1,2),catto:desc".to_string(),]); } } diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index bc8642def..d2508a3c8 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,58 +1,68 @@ -use log::debug; - +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; -use meilisearch_lib::index::{Settings, Unchecked}; -use meilisearch_lib::index_controller::Update; -use meilisearch_lib::MeiliSearch; +use index_scheduler::IndexScheduler; +use log::debug; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::settings::{settings, Settings, Unchecked}; +use meilisearch_types::tasks::KindWithContent; use serde_json::json; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; -use crate::task::SummarizedTaskView; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; +use crate::routes::SummarizedTaskView; #[macro_export] macro_rules! make_setting_route { ($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => { pub mod $attr { + use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse, Resource}; + use index_scheduler::IndexScheduler; use log::debug; - - use meilisearch_lib::milli::update::Setting; - use meilisearch_lib::{index::Settings, index_controller::Update, MeiliSearch}; - use meilisearch_types::error::ResponseError; + use meilisearch_types::index_uid::IndexUid; + use meilisearch_types::milli::update::Setting; + use meilisearch_types::settings::{settings, Settings}; + use meilisearch_types::tasks::KindWithContent; use $crate::analytics::Analytics; - use $crate::extractors::authentication::{policies::*, GuardedData}; + use $crate::extractors::authentication::policies::*; + use $crate::extractors::authentication::GuardedData; use $crate::extractors::sequential_extractor::SeqHandler; - use $crate::task::SummarizedTaskView; + use $crate::routes::SummarizedTaskView; pub async fn delete( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_UPDATE }>, + Data, + >, index_uid: web::Path, ) -> Result { - let settings = Settings { - $attr: Setting::Reset, - ..Default::default() - }; + let new_settings = Settings { $attr: Setting::Reset, ..Default::default() }; - let allow_index_creation = meilisearch.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: true, allow_index_creation, }; - let task: SummarizedTaskView = meilisearch - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn update( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_UPDATE }>, + Data, + >, index_uid: actix_web::web::Path, body: actix_web::web::Json>, req: HttpRequest, @@ -62,7 +72,7 @@ macro_rules! make_setting_route { $analytics(&body, &req); - let settings = Settings { + let new_settings = Settings { $attr: match body { Some(inner_body) => Setting::Set(inner_body), None => Setting::Reset, @@ -70,26 +80,34 @@ macro_rules! make_setting_route { ..Default::default() }; - let allow_index_creation = meilisearch.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: false, allow_index_creation, }; - let task: SummarizedTaskView = meilisearch - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)) + .await?? + .into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn get( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData< + ActionPolicy<{ actions::SETTINGS_GET }>, + Data, + >, index_uid: actix_web::web::Path, ) -> std::result::Result { - let settings = meilisearch.settings(index_uid.into_inner()).await?; + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let settings = settings(&index, &rtxn)?; + debug!("returns: {:?}", settings); let mut json = serde_json::json!(&settings); let val = json[$camelcase_attr].take(); @@ -105,17 +123,6 @@ macro_rules! make_setting_route { } } }; - ($route:literal, $update_verb:ident, $type:ty, $attr:ident, $camelcase_attr:literal) => { - make_setting_route!( - $route, - $update_verb, - $type, - $attr, - $camelcase_attr, - _analytics, - |_, _| {} - ); - }; } make_setting_route!( @@ -169,17 +176,32 @@ make_setting_route!( put, Vec, displayed_attributes, - "displayedAttributes" + "displayedAttributes", + analytics, + |displayed: &Option>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "DisplayedAttributes Updated".to_string(), + json!({ + "displayed_attributes": { + "total": displayed.as_ref().map(|displayed| displayed.len()), + "with_wildcard": displayed.as_ref().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), + }, + }), + Some(req), + ); + } ); make_setting_route!( "/typo-tolerance", patch, - meilisearch_lib::index::updates::TypoSettings, + meilisearch_types::settings::TypoSettings, typo_tolerance, "typoTolerance", analytics, - |setting: &Option, req: &HttpRequest| { + |setting: &Option, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -229,6 +251,7 @@ make_setting_route!( json!({ "searchable_attributes": { "total": setting.as_ref().map(|searchable| searchable.len()), + "with_wildcard": setting.as_ref().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), }, }), Some(req), @@ -241,7 +264,21 @@ make_setting_route!( put, std::collections::BTreeSet, stop_words, - "stopWords" + "stopWords", + analytics, + |stop_words: &Option>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "StopWords Updated".to_string(), + json!({ + "stop_words": { + "total": stop_words.as_ref().map(|stop_words| stop_words.len()), + }, + }), + Some(req), + ); + } ); make_setting_route!( @@ -249,7 +286,21 @@ make_setting_route!( put, std::collections::BTreeMap>, synonyms, - "synonyms" + "synonyms", + analytics, + |synonyms: &Option>>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "Synonyms Updated".to_string(), + json!({ + "synonyms": { + "total": synonyms.as_ref().map(|synonyms| synonyms.len()), + }, + }), + Some(req), + ); + } ); make_setting_route!( @@ -257,7 +308,20 @@ make_setting_route!( put, String, distinct_attribute, - "distinctAttribute" + "distinctAttribute", + analytics, + |distinct: &Option, req: &HttpRequest| { + use serde_json::json; + analytics.publish( + "DistinctAttribute Updated".to_string(), + json!({ + "distinct_attribute": { + "set": distinct.is_some(), + } + }), + Some(req), + ); + } ); make_setting_route!( @@ -274,7 +338,13 @@ make_setting_route!( "RankingRules Updated".to_string(), json!({ "ranking_rules": { - "sort_position": setting.as_ref().map(|sort| sort.iter().position(|s| s == "sort")), + "words_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "words")), + "typo_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "typo")), + "proximity_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "proximity")), + "attribute_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "attribute")), + "sort_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "sort")), + "exactness_position": setting.as_ref().map(|rr| rr.iter().position(|s| s == "exactness")), + "values": setting.as_ref().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::>().join(", ")), } }), Some(req), @@ -285,11 +355,11 @@ make_setting_route!( make_setting_route!( "/faceting", patch, - meilisearch_lib::index::updates::FacetingSettings, + meilisearch_types::settings::FacetingSettings, faceting, "faceting", analytics, - |setting: &Option, req: &HttpRequest| { + |setting: &Option, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -307,11 +377,11 @@ make_setting_route!( make_setting_route!( "/pagination", patch, - meilisearch_lib::index::updates::PaginationSettings, + meilisearch_types::settings::PaginationSettings, pagination, "pagination", analytics, - |setting: &Option, req: &HttpRequest| { + |setting: &Option, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -355,46 +425,60 @@ generate_configure!( ); pub async fn update_all( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: web::Path, body: web::Json>, req: HttpRequest, analytics: web::Data, ) -> Result { - let settings = body.into_inner(); + let new_settings = body.into_inner(); analytics.publish( "Settings Updated".to_string(), json!({ "ranking_rules": { - "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), + "words_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "words")), + "typo_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "typo")), + "proximity_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "proximity")), + "attribute_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "attribute")), + "sort_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "sort")), + "exactness_position": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().position(|s| s == "exactness")), + "values": new_settings.ranking_rules.as_ref().set().map(|rr| rr.iter().filter(|s| !s.contains(':')).cloned().collect::>().join(", ")), }, "searchable_attributes": { - "total": settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), + "total": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.len()), + "with_wildcard": new_settings.searchable_attributes.as_ref().set().map(|searchable| searchable.iter().any(|searchable| searchable == "*")), + }, + "displayed_attributes": { + "total": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.len()), + "with_wildcard": new_settings.displayed_attributes.as_ref().set().map(|displayed| displayed.iter().any(|displayed| displayed == "*")), }, "sortable_attributes": { - "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), - "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), + "total": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "has_geo": new_settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")), }, "filterable_attributes": { - "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), - "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), + "total": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "has_geo": new_settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")), + }, + "distinct_attribute": { + "set": new_settings.distinct_attribute.as_ref().set().is_some() }, "typo_tolerance": { - "enabled": settings.typo_tolerance + "enabled": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.enabled.as_ref().set()) .copied(), - "disable_on_attributes": settings.typo_tolerance + "disable_on_attributes": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.disable_on_attributes.as_ref().set().map(|m| !m.is_empty())), - "disable_on_words": settings.typo_tolerance + "disable_on_words": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.disable_on_words.as_ref().set().map(|m| !m.is_empty())), - "min_word_size_for_one_typo": settings.typo_tolerance + "min_word_size_for_one_typo": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.min_word_size_for_typos @@ -402,7 +486,7 @@ pub async fn update_all( .set() .map(|s| s.one_typo.set())) .flatten(), - "min_word_size_for_two_typos": settings.typo_tolerance + "min_word_size_for_two_typos": new_settings.typo_tolerance .as_ref() .set() .and_then(|s| s.min_word_size_for_typos @@ -412,61 +496,69 @@ pub async fn update_all( .flatten(), }, "faceting": { - "max_values_per_facet": settings.faceting + "max_values_per_facet": new_settings.faceting .as_ref() .set() .and_then(|s| s.max_values_per_facet.as_ref().set()), }, "pagination": { - "max_total_hits": settings.pagination + "max_total_hits": new_settings.pagination .as_ref() .set() .and_then(|s| s.max_total_hits.as_ref().set()), }, + "stop_words": { + "total": new_settings.stop_words.as_ref().set().map(|stop_words| stop_words.len()), + }, + "synonyms": { + "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), + }, }), Some(&req), ); - let allow_index_creation = meilisearch.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: false, allow_index_creation, }; - let task: SummarizedTaskView = meilisearch - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) } pub async fn get_all( - data: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: web::Path, ) -> Result { - let settings = data.settings(index_uid.into_inner()).await?; - debug!("returns: {:?}", settings); - Ok(HttpResponse::Ok().json(settings)) + let index = index_scheduler.index(&index_uid)?; + let rtxn = index.read_txn()?; + let new_settings = settings(&index, &rtxn)?; + debug!("returns: {:?}", new_settings); + Ok(HttpResponse::Ok().json(new_settings)) } pub async fn delete_all( - data: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, index_uid: web::Path, ) -> Result { - let settings = Settings::cleared().into_unchecked(); + let new_settings = Settings::cleared().into_unchecked(); - let allow_index_creation = data.filters().allow_index_creation; - let update = Update::Settings { - settings, + let allow_index_creation = index_scheduler.filters().allow_index_creation; + let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner(); + let task = KindWithContent::SettingsUpdate { + index_uid, + new_settings: Box::new(new_settings), is_deletion: true, allow_index_creation, }; - let task: SummarizedTaskView = data - .register_update(index_uid.into_inner(), update) - .await? - .into(); + let task: SummarizedTaskView = + tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into(); debug!("returns: {:?}", task); Ok(HttpResponse::Accepted().json(task)) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 6a673f600..9fcb1c4b7 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -1,22 +1,27 @@ -use actix_web::{web, HttpRequest, HttpResponse}; -use log::debug; -use serde::{Deserialize, Serialize}; +use std::collections::BTreeMap; +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::{IndexScheduler, Query}; +use log::debug; +use meilisearch_types::error::ResponseError; +use meilisearch_types::settings::{Settings, Unchecked}; +use meilisearch_types::star_or::StarOr; +use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; +use serde::{Deserialize, Serialize}; use serde_json::json; use time::OffsetDateTime; -use meilisearch_lib::index::{Settings, Unchecked}; -use meilisearch_lib::MeiliSearch; -use meilisearch_types::error::ResponseError; -use meilisearch_types::star_or::StarOr; - +use self::indexes::IndexStats; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; mod api_key; mod dump; pub mod indexes; -mod tasks; +mod swap_indexes; +pub mod tasks; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::scope("/tasks").configure(tasks::configure)) @@ -25,7 +30,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::scope("/dumps").configure(dump::configure)) .service(web::resource("/stats").route(web::get().to(get_stats))) .service(web::resource("/version").route(web::get().to(get_version))) - .service(web::scope("/indexes").configure(indexes::configure)); + .service(web::scope("/indexes").configure(indexes::configure)) + .service(web::scope("/swap-indexes").configure(swap_indexes::configure)); } /// Extracts the raw values from the `StarOr` types and @@ -45,6 +51,30 @@ where const PAGINATION_DEFAULT_LIMIT: fn() -> usize = || 20; +#[derive(Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct SummarizedTaskView { + task_uid: TaskId, + index_uid: Option, + status: Status, + #[serde(rename = "type")] + kind: Kind, + #[serde(serialize_with = "time::serde::rfc3339::serialize")] + enqueued_at: OffsetDateTime, +} + +impl From for SummarizedTaskView { + fn from(task: Task) -> Self { + SummarizedTaskView { + task_uid: task.uid, + index_uid: task.index_uid().map(|s| s.to_string()), + status: task.status, + kind: task.kind.as_kind(), + enqueued_at: task.enqueued_at, + } + } +} + #[derive(Debug, Clone, Copy, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct Pagination { @@ -72,11 +102,7 @@ impl Pagination { T: Serialize, { let total = content.len(); - let content: Vec<_> = content - .into_iter() - .skip(self.offset) - .take(self.limit) - .collect(); + let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect(); self.format_with(total, content) } @@ -89,11 +115,7 @@ impl Pagination { where T: Serialize, { - let content: Vec<_> = content - .into_iter() - .skip(self.offset) - .take(self.limit) - .collect(); + let content: Vec<_> = content.into_iter().skip(self.offset).take(self.limit).collect(); self.format_with(total, content) } @@ -103,23 +125,13 @@ impl Pagination { where T: Serialize, { - PaginationView { - results, - offset: self.offset, - limit: self.limit, - total, - } + PaginationView { results, offset: self.offset, limit: self.limit, total } } } impl PaginationView { pub fn new(offset: usize, limit: usize, total: usize, results: Vec) -> Self { - Self { - offset, - limit, - results, - total, - } + Self { offset, limit, results, total } } } @@ -181,10 +193,7 @@ pub struct EnqueuedUpdateResult { pub update_type: UpdateType, #[serde(with = "time::serde::rfc3339")] pub enqueued_at: OffsetDateTime, - #[serde( - skip_serializing_if = "Option::is_none", - with = "time::serde::rfc3339::option" - )] + #[serde(skip_serializing_if = "Option::is_none", with = "time::serde::rfc3339::option")] pub started_processing_at: Option, } @@ -231,21 +240,62 @@ pub async fn running() -> HttpResponse { HttpResponse::Ok().json(serde_json::json!({ "status": "Meilisearch is running" })) } +#[derive(Serialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct Stats { + pub database_size: u64, + #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] + pub last_update: Option, + pub indexes: BTreeMap, +} + async fn get_stats( - meilisearch: GuardedData, MeiliSearch>, + index_scheduler: GuardedData, Data>, req: HttpRequest, analytics: web::Data, ) -> Result { - analytics.publish( - "Stats Seen".to_string(), - json!({ "per_index_uid": false }), - Some(&req), - ); - let search_rules = &meilisearch.filters().search_rules; - let response = meilisearch.get_all_stats(search_rules).await?; + analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req)); + let search_rules = &index_scheduler.filters().search_rules; - debug!("returns: {:?}", response); - Ok(HttpResponse::Ok().json(response)) + let stats = create_all_stats((*index_scheduler).clone(), search_rules)?; + + debug!("returns: {:?}", stats); + Ok(HttpResponse::Ok().json(stats)) +} + +pub fn create_all_stats( + index_scheduler: Data, + search_rules: &meilisearch_auth::SearchRules, +) -> Result { + let mut last_task: Option = None; + let mut indexes = BTreeMap::new(); + let mut database_size = 0; + let processing_task = index_scheduler.get_tasks_from_authorized_indexes( + Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() }, + search_rules.authorized_indexes(), + )?; + let processing_index = processing_task.first().and_then(|task| task.index_uid()); + for (name, index) in index_scheduler.indexes()? { + if !search_rules.is_index_authorized(&name) { + continue; + } + + database_size += index.on_disk_size()?; + + let rtxn = index.read_txn()?; + let stats = IndexStats { + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing: processing_index.map_or(false, |index_name| name == index_name), + field_distribution: index.field_distribution(&rtxn)?, + }; + + let updated_at = index.updated_at(&rtxn)?; + last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at))); + + indexes.insert(name, stats); + } + let stats = Stats { database_size, last_update: last_task, indexes }; + Ok(stats) } #[derive(Serialize)] @@ -257,8 +307,12 @@ struct VersionResponse { } async fn get_version( - _meilisearch: GuardedData, MeiliSearch>, + _index_scheduler: GuardedData, Data>, + req: HttpRequest, + analytics: web::Data, ) -> HttpResponse { + analytics.publish("Version Seen".to_string(), json!(null), Some(&req)); + let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -275,6 +329,11 @@ struct KeysResponse { public: Option, } -pub async fn get_health() -> Result { +pub async fn get_health( + req: HttpRequest, + analytics: web::Data, +) -> Result { + analytics.health_seen(&req); + Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" }))) } diff --git a/meilisearch-http/src/routes/swap_indexes.rs b/meilisearch-http/src/routes/swap_indexes.rs new file mode 100644 index 000000000..bc0c1705a --- /dev/null +++ b/meilisearch-http/src/routes/swap_indexes.rs @@ -0,0 +1,59 @@ +use actix_web::web::Data; +use actix_web::{web, HttpRequest, HttpResponse}; +use index_scheduler::IndexScheduler; +use meilisearch_types::error::ResponseError; +use meilisearch_types::tasks::{IndexSwap, KindWithContent}; +use serde::Deserialize; +use serde_json::json; + +use super::SummarizedTaskView; +use crate::analytics::Analytics; +use crate::error::MeilisearchHttpError; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::{AuthenticationError, GuardedData}; +use crate::extractors::sequential_extractor::SeqHandler; + +pub fn configure(cfg: &mut web::ServiceConfig) { + cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes)))); +} +#[derive(Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct SwapIndexesPayload { + indexes: Vec, +} + +pub async fn swap_indexes( + index_scheduler: GuardedData, Data>, + params: web::Json>, + req: HttpRequest, + analytics: web::Data, +) -> Result { + analytics.publish( + "Indexes Swapped".to_string(), + json!({ + "swap_operation_number": params.len(), + }), + Some(&req), + ); + let search_rules = &index_scheduler.filters().search_rules; + + let mut swaps = vec![]; + for SwapIndexesPayload { indexes } in params.into_inner().into_iter() { + let (lhs, rhs) = match indexes.as_slice() { + [lhs, rhs] => (lhs, rhs), + _ => { + return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into()); + } + }; + if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) { + return Err(AuthenticationError::InvalidToken.into()); + } + swaps.push(IndexSwap { indexes: (lhs.clone(), rhs.clone()) }); + } + + let task = KindWithContent::IndexSwap { swaps }; + + let task = index_scheduler.register(task)?; + let task: SummarizedTaskView = task.into(); + Ok(HttpResponse::Accepted().json(task)) +} diff --git a/meilisearch-http/src/routes/tasks.rs b/meilisearch-http/src/routes/tasks.rs index 50902485b..914315711 100644 --- a/meilisearch-http/src/routes/tasks.rs +++ b/meilisearch-http/src/routes/tasks.rs @@ -1,159 +1,650 @@ +use std::str::FromStr; + +use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; -use meilisearch_lib::tasks::task::{TaskContent, TaskEvent, TaskId}; -use meilisearch_lib::tasks::TaskFilter; -use meilisearch_lib::MeiliSearch; +use index_scheduler::{IndexScheduler, Query, TaskId}; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid::IndexUid; +use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::star_or::StarOr; -use serde::Deserialize; +use meilisearch_types::tasks::{ + serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task, +}; +use serde::{Deserialize, Serialize}; use serde_cs::vec::CS; use serde_json::json; +use time::{Duration, OffsetDateTime}; +use tokio::task; +use self::date_deserializer::{deserialize_date, DeserializeDateOption}; +use super::{fold_star_or, SummarizedTaskView}; use crate::analytics::Analytics; -use crate::extractors::authentication::{policies::*, GuardedData}; +use crate::extractors::authentication::policies::*; +use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; -use crate::task::{TaskListView, TaskStatus, TaskType, TaskView}; -use super::fold_star_or; - -const DEFAULT_LIMIT: fn() -> usize = || 20; +const DEFAULT_LIMIT: fn() -> u32 = || 20; pub fn configure(cfg: &mut web::ServiceConfig) { - cfg.service(web::resource("").route(web::get().to(SeqHandler(get_tasks)))) - .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); + cfg.service( + web::resource("") + .route(web::get().to(SeqHandler(get_tasks))) + .route(web::delete().to(SeqHandler(delete_tasks))), + ) + .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks)))) + .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task)))); +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct TaskView { + pub uid: TaskId, + #[serde(default)] + pub index_uid: Option, + pub status: Status, + #[serde(rename = "type")] + pub kind: Kind, + pub canceled_by: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option, + pub error: Option, + #[serde(serialize_with = "serialize_duration", default)] + pub duration: Option, + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339::option", default)] + pub started_at: Option, + #[serde(with = "time::serde::rfc3339::option", default)] + pub finished_at: Option, +} + +impl TaskView { + pub fn from_task(task: &Task) -> TaskView { + TaskView { + uid: task.uid, + index_uid: task.index_uid().map(ToOwned::to_owned), + status: task.status, + kind: task.kind.as_kind(), + canceled_by: task.canceled_by, + details: task.details.clone().map(DetailsView::from), + error: task.error.clone(), + duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start), + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + } + } +} + +#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct DetailsView { + #[serde(skip_serializing_if = "Option::is_none")] + pub received_documents: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub indexed_documents: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub primary_key: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub provided_ids: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub deleted_documents: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub matched_tasks: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub canceled_tasks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub deleted_tasks: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub original_filter: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub dump_uid: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(flatten)] + pub settings: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub swaps: Option>, +} + +impl From
for DetailsView { + fn from(details: Details) -> Self { + match details { + Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => { + DetailsView { + received_documents: Some(received_documents), + indexed_documents: Some(indexed_documents), + ..DetailsView::default() + } + } + Details::SettingsUpdate { settings } => { + DetailsView { settings: Some(settings), ..DetailsView::default() } + } + Details::IndexInfo { primary_key } => { + DetailsView { primary_key: Some(primary_key), ..DetailsView::default() } + } + Details::DocumentDeletion { + provided_ids: received_document_ids, + deleted_documents, + } => DetailsView { + provided_ids: Some(received_document_ids), + deleted_documents: Some(deleted_documents), + ..DetailsView::default() + }, + Details::ClearAll { deleted_documents } => { + DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() } + } + Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => { + DetailsView { + matched_tasks: Some(matched_tasks), + canceled_tasks: Some(canceled_tasks), + original_filter: Some(original_filter), + ..DetailsView::default() + } + } + Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => { + DetailsView { + matched_tasks: Some(matched_tasks), + deleted_tasks: Some(deleted_tasks), + original_filter: Some(original_filter), + ..DetailsView::default() + } + } + Details::Dump { dump_uid } => { + DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() } + } + Details::IndexSwap { swaps } => { + DetailsView { swaps: Some(swaps), ..Default::default() } + } + } + } } #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase", deny_unknown_fields)] -pub struct TasksFilterQuery { - #[serde(rename = "type")] - type_: Option>>, - status: Option>>, - index_uid: Option>>, +pub struct TaskCommonQueryRaw { + pub uids: Option>, + pub canceled_by: Option>, + pub types: Option>>, + pub statuses: Option>>, + pub index_uids: Option>>, +} +impl TaskCommonQueryRaw { + fn validate(self) -> Result { + let Self { uids, canceled_by, types, statuses, index_uids } = self; + let uids = if let Some(uids) = uids { + Some( + uids.into_iter() + .map(|uid_string| { + uid_string.parse::().map_err(|_e| { + index_scheduler::Error::InvalidTaskUids { task_uid: uid_string }.into() + }) + }) + .collect::, ResponseError>>()?, + ) + } else { + None + }; + let canceled_by = if let Some(canceled_by) = canceled_by { + Some( + canceled_by + .into_iter() + .map(|canceled_by_string| { + canceled_by_string.parse::().map_err(|_e| { + index_scheduler::Error::InvalidTaskCanceledBy { + canceled_by: canceled_by_string, + } + .into() + }) + }) + .collect::, ResponseError>>()?, + ) + } else { + None + }; + + let types = if let Some(types) = types.and_then(fold_star_or) as Option> { + Some( + types + .into_iter() + .map(|type_string| { + Kind::from_str(&type_string).map_err(|_e| { + index_scheduler::Error::InvalidTaskTypes { type_: type_string }.into() + }) + }) + .collect::, ResponseError>>()?, + ) + } else { + None + }; + let statuses = if let Some(statuses) = + statuses.and_then(fold_star_or) as Option> + { + Some( + statuses + .into_iter() + .map(|status_string| { + Status::from_str(&status_string).map_err(|_e| { + index_scheduler::Error::InvalidTaskStatuses { status: status_string } + .into() + }) + }) + .collect::, ResponseError>>()?, + ) + } else { + None + }; + + let index_uids = + if let Some(index_uids) = index_uids.and_then(fold_star_or) as Option> { + Some( + index_uids + .into_iter() + .map(|index_uid_string| { + IndexUid::from_str(&index_uid_string) + .map(|index_uid| index_uid.to_string()) + .map_err(|_e| { + index_scheduler::Error::InvalidIndexUid { + index_uid: index_uid_string, + } + .into() + }) + }) + .collect::, ResponseError>>()?, + ) + } else { + None + }; + Ok(TaskCommonQuery { types, uids, canceled_by, statuses, index_uids }) + } +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TaskDateQueryRaw { + pub after_enqueued_at: Option, + pub before_enqueued_at: Option, + pub after_started_at: Option, + pub before_started_at: Option, + pub after_finished_at: Option, + pub before_finished_at: Option, +} +impl TaskDateQueryRaw { + fn validate(self) -> Result { + let Self { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + } = self; + + let mut query = TaskDateQuery { + after_enqueued_at: None, + before_enqueued_at: None, + after_started_at: None, + before_started_at: None, + after_finished_at: None, + before_finished_at: None, + }; + + for (field_name, string_value, before_or_after, dest) in [ + ( + "afterEnqueuedAt", + after_enqueued_at, + DeserializeDateOption::After, + &mut query.after_enqueued_at, + ), + ( + "beforeEnqueuedAt", + before_enqueued_at, + DeserializeDateOption::Before, + &mut query.before_enqueued_at, + ), + ( + "afterStartedAt", + after_started_at, + DeserializeDateOption::After, + &mut query.after_started_at, + ), + ( + "beforeStartedAt", + before_started_at, + DeserializeDateOption::Before, + &mut query.before_started_at, + ), + ( + "afterFinishedAt", + after_finished_at, + DeserializeDateOption::After, + &mut query.after_finished_at, + ), + ( + "beforeFinishedAt", + before_finished_at, + DeserializeDateOption::Before, + &mut query.before_finished_at, + ), + ] { + if let Some(string_value) = string_value { + *dest = Some(deserialize_date(field_name, &string_value, before_or_after)?); + } + } + + Ok(query) + } +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TasksFilterQueryRaw { + #[serde(flatten)] + pub common: TaskCommonQueryRaw, #[serde(default = "DEFAULT_LIMIT")] - limit: usize, + pub limit: u32, + pub from: Option, + #[serde(flatten)] + pub dates: TaskDateQueryRaw, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TaskDeletionOrCancelationQueryRaw { + #[serde(flatten)] + pub common: TaskCommonQueryRaw, + #[serde(flatten)] + pub dates: TaskDateQueryRaw, +} + +impl TasksFilterQueryRaw { + fn validate(self) -> Result { + let Self { common, limit, from, dates } = self; + let common = common.validate()?; + let dates = dates.validate()?; + + Ok(TasksFilterQuery { common, limit, from, dates }) + } +} + +impl TaskDeletionOrCancelationQueryRaw { + fn validate(self) -> Result { + let Self { common, dates } = self; + let common = common.validate()?; + let dates = dates.validate()?; + + Ok(TaskDeletionOrCancelationQuery { common, dates }) + } +} + +#[derive(Serialize, Debug)] +#[serde(rename_all = "camelCase", deny_unknown_fields)] +pub struct TaskDateQuery { + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize" + )] + after_enqueued_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize" + )] + before_enqueued_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize" + )] + after_started_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize" + )] + before_started_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize" + )] + after_finished_at: Option, + #[serde( + default, + skip_serializing_if = "Option::is_none", + serialize_with = "time::serde::rfc3339::option::serialize" + )] + before_finished_at: Option, +} + +#[derive(Debug)] +pub struct TaskCommonQuery { + types: Option>, + uids: Option>, + canceled_by: Option>, + statuses: Option>, + index_uids: Option>, +} + +#[derive(Debug)] +pub struct TasksFilterQuery { + limit: u32, from: Option, + common: TaskCommonQuery, + dates: TaskDateQuery, } -#[rustfmt::skip] -fn task_type_matches_content(type_: &TaskType, content: &TaskContent) -> bool { - matches!((type_, content), - (TaskType::IndexCreation, TaskContent::IndexCreation { .. }) - | (TaskType::IndexUpdate, TaskContent::IndexUpdate { .. }) - | (TaskType::IndexDeletion, TaskContent::IndexDeletion { .. }) - | (TaskType::DocumentAdditionOrUpdate, TaskContent::DocumentAddition { .. }) - | (TaskType::DocumentDeletion, TaskContent::DocumentDeletion{ .. }) - | (TaskType::SettingsUpdate, TaskContent::SettingsUpdate { .. }) - ) +#[derive(Debug)] +pub struct TaskDeletionOrCancelationQuery { + common: TaskCommonQuery, + dates: TaskDateQuery, } -#[rustfmt::skip] -fn task_status_matches_events(status: &TaskStatus, events: &[TaskEvent]) -> bool { - events.last().map_or(false, |event| { - matches!((status, event), - (TaskStatus::Enqueued, TaskEvent::Created(_)) - | (TaskStatus::Processing, TaskEvent::Processing(_) | TaskEvent::Batched { .. }) - | (TaskStatus::Succeeded, TaskEvent::Succeeded { .. }) - | (TaskStatus::Failed, TaskEvent::Failed { .. }), - ) - }) -} - -async fn get_tasks( - meilisearch: GuardedData, MeiliSearch>, - params: web::Query, +async fn cancel_tasks( + index_scheduler: GuardedData, Data>, + params: web::Query, req: HttpRequest, analytics: web::Data, ) -> Result { - let TasksFilterQuery { - type_, - status, - index_uid, - limit, - from, - } = params.into_inner(); - - let search_rules = &meilisearch.filters().search_rules; - - // We first transform a potential indexUid=* into a "not specified indexUid filter" - // for every one of the filters: type, status, and indexUid. - let type_: Option> = type_.and_then(fold_star_or); - let status: Option> = status.and_then(fold_star_or); - let index_uid: Option> = index_uid.and_then(fold_star_or); + let query = params.into_inner().validate()?; + let TaskDeletionOrCancelationQuery { + common: TaskCommonQuery { types, uids, canceled_by, statuses, index_uids }, + dates: + TaskDateQuery { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }, + } = query; analytics.publish( - "Tasks Seen".to_string(), + "Tasks Canceled".to_string(), json!({ - "filtered_by_index_uid": index_uid.as_ref().map_or(false, |v| !v.is_empty()), - "filtered_by_type": type_.as_ref().map_or(false, |v| !v.is_empty()), - "filtered_by_status": status.as_ref().map_or(false, |v| !v.is_empty()), + "filtered_by_uid": uids.is_some(), + "filtered_by_index_uid": index_uids.is_some(), + "filtered_by_type": types.is_some(), + "filtered_by_status": statuses.is_some(), + "filtered_by_canceled_by": canceled_by.is_some(), + "filtered_by_before_enqueued_at": before_enqueued_at.is_some(), + "filtered_by_after_enqueued_at": after_enqueued_at.is_some(), + "filtered_by_before_started_at": before_started_at.is_some(), + "filtered_by_after_started_at": after_started_at.is_some(), + "filtered_by_before_finished_at": before_finished_at.is_some(), + "filtered_by_after_finished_at": after_finished_at.is_some(), }), Some(&req), ); - // Then we filter on potential indexes and make sure that the search filter - // restrictions are also applied. - let indexes_filters = match index_uid { - Some(indexes) => { - let mut filters = TaskFilter::default(); - for name in indexes { - if search_rules.is_index_authorized(&name) { - filters.filter_index(name.to_string()); - } - } - Some(filters) - } - None => { - if search_rules.is_index_authorized("*") { - None - } else { - let mut filters = TaskFilter::default(); - for (index, _policy) in search_rules.clone() { - filters.filter_index(index); - } - Some(filters) - } - } + let query = Query { + limit: None, + from: None, + statuses, + types, + index_uids, + uids, + canceled_by, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, }; - // Then we complete the task filter with other potential status and types filters. - let filters = if type_.is_some() || status.is_some() { - let mut filters = indexes_filters.unwrap_or_default(); - filters.filter_fn(Box::new(move |task| { - let matches_type = match &type_ { - Some(types) => types - .iter() - .any(|t| task_type_matches_content(t, &task.content)), - None => true, - }; + if query.is_empty() { + return Err(index_scheduler::Error::TaskCancelationWithEmptyQuery.into()); + } - let matches_status = match &status { - Some(statuses) => statuses - .iter() - .any(|t| task_status_matches_events(t, &task.events)), - None => true, - }; + let tasks = index_scheduler.get_task_ids_from_authorized_indexes( + &index_scheduler.read_txn()?, + &query, + &index_scheduler.filters().search_rules.authorized_indexes(), + )?; + let task_cancelation = + KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks }; - matches_type && matches_status - })); - Some(filters) - } else { - indexes_filters + let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??; + let task: SummarizedTaskView = task.into(); + + Ok(HttpResponse::Ok().json(task)) +} + +async fn delete_tasks( + index_scheduler: GuardedData, Data>, + params: web::Query, + req: HttpRequest, + analytics: web::Data, +) -> Result { + let TaskDeletionOrCancelationQuery { + common: TaskCommonQuery { types, uids, canceled_by, statuses, index_uids }, + dates: + TaskDateQuery { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }, + } = params.into_inner().validate()?; + + analytics.publish( + "Tasks Deleted".to_string(), + json!({ + "filtered_by_uid": uids.is_some(), + "filtered_by_index_uid": index_uids.is_some(), + "filtered_by_type": types.is_some(), + "filtered_by_status": statuses.is_some(), + "filtered_by_canceled_by": canceled_by.is_some(), + "filtered_by_before_enqueued_at": before_enqueued_at.is_some(), + "filtered_by_after_enqueued_at": after_enqueued_at.is_some(), + "filtered_by_before_started_at": before_started_at.is_some(), + "filtered_by_after_started_at": after_started_at.is_some(), + "filtered_by_before_finished_at": before_finished_at.is_some(), + "filtered_by_after_finished_at": after_finished_at.is_some(), + }), + Some(&req), + ); + + let query = Query { + limit: None, + from: None, + statuses, + types, + index_uids, + uids, + canceled_by, + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, }; + if query.is_empty() { + return Err(index_scheduler::Error::TaskDeletionWithEmptyQuery.into()); + } + + let tasks = index_scheduler.get_task_ids_from_authorized_indexes( + &index_scheduler.read_txn()?, + &query, + &index_scheduler.filters().search_rules.authorized_indexes(), + )?; + let task_deletion = + KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks }; + + let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??; + let task: SummarizedTaskView = task.into(); + + Ok(HttpResponse::Ok().json(task)) +} + +#[derive(Debug, Serialize)] +pub struct AllTasks { + results: Vec, + limit: u32, + from: Option, + next: Option, +} + +async fn get_tasks( + index_scheduler: GuardedData, Data>, + params: web::Query, + req: HttpRequest, + analytics: web::Data, +) -> Result { + analytics.get_tasks(¶ms, &req); + + let TasksFilterQuery { + common: TaskCommonQuery { types, uids, canceled_by, statuses, index_uids }, + limit, + from, + dates: + TaskDateQuery { + after_enqueued_at, + before_enqueued_at, + after_started_at, + before_started_at, + after_finished_at, + before_finished_at, + }, + } = params.into_inner().validate()?; + // We +1 just to know if there is more after this "page" or not. let limit = limit.saturating_add(1); - let mut tasks_results: Vec<_> = meilisearch - .list_tasks(filters, Some(limit), from) - .await? + let query = index_scheduler::Query { + limit: Some(limit), + from, + statuses, + types, + index_uids, + uids, + canceled_by, + before_enqueued_at, + after_enqueued_at, + before_started_at, + after_started_at, + before_finished_at, + after_finished_at, + }; + + let mut tasks_results: Vec = index_scheduler + .get_tasks_from_authorized_indexes( + query, + index_scheduler.filters().search_rules.authorized_indexes(), + )? .into_iter() - .map(TaskView::from) + .map(|t| TaskView::from_task(&t)) .collect(); // If we were able to fetch the number +1 tasks we asked // it means that there is more to come. - let next = if tasks_results.len() == limit { + let next = if tasks_results.len() == limit as usize { tasks_results.pop().map(|t| t.uid) } else { None @@ -161,43 +652,369 @@ async fn get_tasks( let from = tasks_results.first().map(|t| t.uid); - let tasks = TaskListView { - results: tasks_results, - limit: limit.saturating_sub(1), - from, - next, - }; - + let tasks = AllTasks { results: tasks_results, limit: limit.saturating_sub(1), from, next }; Ok(HttpResponse::Ok().json(tasks)) } async fn get_task( - meilisearch: GuardedData, MeiliSearch>, - task_id: web::Path, + index_scheduler: GuardedData, Data>, + task_uid: web::Path, req: HttpRequest, analytics: web::Data, ) -> Result { - analytics.publish( - "Tasks Seen".to_string(), - json!({ "per_task_uid": true }), - Some(&req), - ); + let task_uid_string = task_uid.into_inner(); - let search_rules = &meilisearch.filters().search_rules; - let filters = if search_rules.is_index_authorized("*") { - None - } else { - let mut filters = TaskFilter::default(); - for (index, _policy) in search_rules.clone() { - filters.filter_index(index); + let task_uid: TaskId = match task_uid_string.parse() { + Ok(id) => id, + Err(_e) => { + return Err(index_scheduler::Error::InvalidTaskUids { task_uid: task_uid_string }.into()) } - Some(filters) }; - let task: TaskView = meilisearch - .get_task(task_id.into_inner(), filters) - .await? - .into(); + analytics.publish("Tasks Seen".to_string(), json!({ "per_task_uid": true }), Some(&req)); - Ok(HttpResponse::Ok().json(task)) + let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() }; + + if let Some(task) = index_scheduler + .get_tasks_from_authorized_indexes( + query, + index_scheduler.filters().search_rules.authorized_indexes(), + )? + .first() + { + let task_view = TaskView::from_task(task); + Ok(HttpResponse::Ok().json(task_view)) + } else { + Err(index_scheduler::Error::TaskNotFound(task_uid).into()) + } +} + +pub(crate) mod date_deserializer { + use meilisearch_types::error::ResponseError; + use time::format_description::well_known::Rfc3339; + use time::macros::format_description; + use time::{Date, Duration, OffsetDateTime, Time}; + + pub enum DeserializeDateOption { + Before, + After, + } + + pub fn deserialize_date( + field_name: &str, + value: &str, + option: DeserializeDateOption, + ) -> std::result::Result { + // We can't parse using time's rfc3339 format, since then we won't know what part of the + // datetime was not explicitly specified, and thus we won't be able to increment it to the + // next step. + if let Ok(datetime) = OffsetDateTime::parse(value, &Rfc3339) { + // fully specified up to the second + // we assume that the subseconds are 0 if not specified, and we don't increment to the next second + Ok(datetime) + } else if let Ok(datetime) = Date::parse( + value, + format_description!("[year repr:full base:calendar]-[month repr:numerical]-[day]"), + ) { + let datetime = datetime.with_time(Time::MIDNIGHT).assume_utc(); + // add one day since the time was not specified + match option { + DeserializeDateOption::Before => Ok(datetime), + DeserializeDateOption::After => { + let datetime = datetime.checked_add(Duration::days(1)).unwrap_or(datetime); + Ok(datetime) + } + } + } else { + Err(index_scheduler::Error::InvalidTaskDate { + field: field_name.to_string(), + date: value.to_string(), + } + .into()) + } + } +} + +#[cfg(test)] +mod tests { + use meili_snap::snapshot; + + use crate::routes::tasks::{TaskDeletionOrCancelationQueryRaw, TasksFilterQueryRaw}; + + #[test] + fn deserialize_task_filter_dates() { + { + let json = r#" { + "afterEnqueuedAt": "2021-12-03", + "beforeEnqueuedAt": "2021-12-03", + "afterStartedAt": "2021-12-03", + "beforeStartedAt": "2021-12-03", + "afterFinishedAt": "2021-12-03", + "beforeFinishedAt": "2021-12-03" + } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"2021-12-04 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_enqueued_at.unwrap()), @"2021-12-03 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.after_started_at.unwrap()), @"2021-12-04 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_started_at.unwrap()), @"2021-12-03 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.after_finished_at.unwrap()), @"2021-12-04 0:00:00.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_finished_at.unwrap()), @"2021-12-03 0:00:00.0 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "2021-12-03T23:45:23Z", "beforeEnqueuedAt": "2021-12-03T23:45:23Z" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"2021-12-03 23:45:23.0 +00:00:00"); + snapshot!(format!("{:?}", query.dates.before_enqueued_at.unwrap()), @"2021-12-03 23:45:23.0 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "1997-11-12T09:55:06-06:20" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"1997-11-12 9:55:06.0 -06:20:00"); + } + { + let json = r#" { "afterEnqueuedAt": "1997-11-12T09:55:06+00:00" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"1997-11-12 9:55:06.0 +00:00:00"); + } + { + let json = r#" { "afterEnqueuedAt": "1997-11-12T09:55:06.200000300Z" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.dates.after_enqueued_at.unwrap()), @"1997-11-12 9:55:06.2000003 +00:00:00"); + } + { + let json = r#" { "afterFinishedAt": "2021" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task `afterFinishedAt` `2021` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format."); + } + { + let json = r#" { "beforeFinishedAt": "2021" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task `beforeFinishedAt` `2021` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format."); + } + { + let json = r#" { "afterEnqueuedAt": "2021-12" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task `afterEnqueuedAt` `2021-12` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format."); + } + + { + let json = r#" { "beforeEnqueuedAt": "2021-12-03T23" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task `beforeEnqueuedAt` `2021-12-03T23` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format."); + } + { + let json = r#" { "afterStartedAt": "2021-12-03T23:45" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task `afterStartedAt` `2021-12-03T23:45` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format."); + + let json = r#" { "beforeStartedAt": "2021-12-03T23:45" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task `beforeStartedAt` `2021-12-03T23:45` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format."); + } + } + + #[test] + fn deserialize_task_filter_uids() { + { + let json = r#" { "uids": "78,1,12,73" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.uids.unwrap()), @"[78, 1, 12, 73]"); + } + { + let json = r#" { "uids": "1" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.uids.unwrap()), @"[1]"); + } + { + let json = r#" { "uids": "78,hello,world" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task uid `hello` is invalid. It should only contain numeric characters."); + } + { + let json = r#" { "uids": "cat" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task uid `cat` is invalid. It should only contain numeric characters."); + } + } + + #[test] + fn deserialize_task_filter_status() { + { + let json = r#" { "statuses": "succeeded,failed,enqueued,processing,canceled" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.statuses.unwrap()), @"[Succeeded, Failed, Enqueued, Processing, Canceled]"); + } + { + let json = r#" { "statuses": "enqueued" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.statuses.unwrap()), @"[Enqueued]"); + } + { + let json = r#" { "statuses": "finished" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task status `finished` is invalid. Available task statuses are `enqueued`, `processing`, `succeeded`, `failed`, `canceled`."); + } + } + #[test] + fn deserialize_task_filter_types() { + { + let json = r#" { "types": "documentAdditionOrUpdate,documentDeletion,settingsUpdate,indexCreation,indexDeletion,indexUpdate,indexSwap,taskCancelation,taskDeletion,dumpCreation,snapshotCreation" }"#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.types.unwrap()), @"[DocumentAdditionOrUpdate, DocumentDeletion, SettingsUpdate, IndexCreation, IndexDeletion, IndexUpdate, IndexSwap, TaskCancelation, TaskDeletion, DumpCreation, SnapshotCreation]"); + } + { + let json = r#" { "types": "settingsUpdate" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.types.unwrap()), @"[SettingsUpdate]"); + } + { + let json = r#" { "types": "createIndex" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"Task type `createIndex` is invalid. Available task types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`"); + } + } + #[test] + fn deserialize_task_filter_index_uids() { + { + let json = r#" { "indexUids": "toto,tata-78" }"#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.index_uids.unwrap()), @r###"["toto", "tata-78"]"###); + } + { + let json = r#" { "indexUids": "index_a" } "#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query.common.index_uids.unwrap()), @r###"["index_a"]"###); + } + { + let json = r#" { "indexUids": "1,hé" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"hé is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)."); + } + { + let json = r#" { "indexUids": "hé" } "#; + let err = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap_err(); + snapshot!(format!("{err}"), @"hé is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)."); + } + } + + #[test] + fn deserialize_task_filter_general() { + { + let json = r#" { "from": 12, "limit": 15, "indexUids": "toto,tata-78", "statuses": "succeeded,enqueued", "afterEnqueuedAt": "2012-04-23", "uids": "1,2,3" }"#; + let query = + serde_json::from_str::(json).unwrap().validate().unwrap(); + snapshot!(format!("{:?}", query), @r###"TasksFilterQuery { limit: 15, from: Some(12), common: TaskCommonQuery { types: None, uids: Some([1, 2, 3]), canceled_by: None, statuses: Some([Succeeded, Enqueued]), index_uids: Some(["toto", "tata-78"]) }, dates: TaskDateQuery { after_enqueued_at: Some(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None } }"###); + } + { + // Stars should translate to `None` in the query + // Verify value of the default limit + let json = r#" { "indexUids": "*", "statuses": "succeeded,*", "afterEnqueuedAt": "2012-04-23", "uids": "1,2,3" }"#; + let query = + serde_json::from_str::(json).unwrap().validate().unwrap(); + snapshot!(format!("{:?}", query), @"TasksFilterQuery { limit: 20, from: None, common: TaskCommonQuery { types: None, uids: Some([1, 2, 3]), canceled_by: None, statuses: None, index_uids: None }, dates: TaskDateQuery { after_enqueued_at: Some(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None } }"); + } + { + // Stars should also translate to `None` in task deletion/cancelation queries + let json = r#" { "indexUids": "*", "statuses": "succeeded,*", "afterEnqueuedAt": "2012-04-23", "uids": "1,2,3" }"#; + let query = serde_json::from_str::(json) + .unwrap() + .validate() + .unwrap(); + snapshot!(format!("{:?}", query), @"TaskDeletionOrCancelationQuery { common: TaskCommonQuery { types: None, uids: Some([1, 2, 3]), canceled_by: None, statuses: None, index_uids: None }, dates: TaskDateQuery { after_enqueued_at: Some(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None } }"); + } + { + // Stars in uids not allowed + let json = r#" { "uids": "*" }"#; + let err = + serde_json::from_str::(json).unwrap().validate().unwrap_err(); + snapshot!(format!("{err}"), @"Task uid `*` is invalid. It should only contain numeric characters."); + } + { + // From not allowed in task deletion/cancelation queries + let json = r#" { "from": 12 }"#; + let err = serde_json::from_str::(json).unwrap_err(); + snapshot!(format!("{err}"), @"unknown field `from` at line 1 column 15"); + } + { + // Limit not allowed in task deletion/cancelation queries + let json = r#" { "limit": 12 }"#; + let err = serde_json::from_str::(json).unwrap_err(); + snapshot!(format!("{err}"), @"unknown field `limit` at line 1 column 16"); + } + } } diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-http/src/search.rs similarity index 61% rename from meilisearch-lib/src/index/search.rs rename to meilisearch-http/src/search.rs index 57171d529..7310e7914 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-http/src/search.rs @@ -4,40 +4,38 @@ use std::str::FromStr; use std::time::Instant; use either::Either; +use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; +use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; use milli::{ - AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError, - TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, + AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder, + SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, }; use regex::Regex; use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; -use crate::index::error::FacetError; +use crate::error::MeilisearchHttpError; -use super::error::{IndexError, Result}; -use super::index::Index; - -pub type Document = serde_json::Map; type MatchesPosition = BTreeMap>; +pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; pub const DEFAULT_SEARCH_LIMIT: fn() -> usize = || 20; pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10; pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); -/// The maximimum number of results that the engine -/// will be able to return in one search call. -pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; - #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct SearchQuery { pub q: Option, - pub offset: Option, + #[serde(default = "DEFAULT_SEARCH_OFFSET")] + pub offset: usize, #[serde(default = "DEFAULT_SEARCH_LIMIT")] pub limit: usize, + pub page: Option, + pub hits_per_page: Option, pub attributes_to_retrieve: Option>, pub attributes_to_crop: Option>, #[serde(default = "DEFAULT_CROP_LENGTH")] @@ -59,6 +57,12 @@ pub struct SearchQuery { pub matching_strategy: MatchingStrategy, } +impl SearchQuery { + pub fn is_finite_pagination(&self) -> bool { + self.page.or(self.hits_per_page).is_some() + } +} + #[derive(Deserialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub enum MatchingStrategy { @@ -83,7 +87,7 @@ impl From for TermsMatchingStrategy { } } -#[derive(Debug, Clone, Serialize, PartialEq)] +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] pub struct SearchHit { #[serde(flatten)] pub document: Document, @@ -93,196 +97,226 @@ pub struct SearchHit { pub matches_position: Option, } -#[derive(Serialize, Debug, Clone, PartialEq)] +#[derive(Serialize, Debug, Clone, PartialEq, Eq)] #[serde(rename_all = "camelCase")] pub struct SearchResult { pub hits: Vec, - pub estimated_total_hits: u64, pub query: String, - pub limit: usize, - pub offset: usize, pub processing_time_ms: u128, + #[serde(flatten)] + pub hits_info: HitsInfo, #[serde(skip_serializing_if = "Option::is_none")] pub facet_distribution: Option>>, } -impl Index { - pub fn perform_search(&self, query: SearchQuery) -> Result { - let before_search = Instant::now(); - let rtxn = self.read_txn()?; +#[derive(Serialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum HitsInfo { + #[serde(rename_all = "camelCase")] + Pagination { hits_per_page: usize, page: usize, total_pages: usize, total_hits: usize }, + #[serde(rename_all = "camelCase")] + OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, +} - let mut search = self.search(&rtxn); +pub fn perform_search( + index: &Index, + query: SearchQuery, +) -> Result { + let before_search = Instant::now(); + let rtxn = index.read_txn()?; - if let Some(ref query) = query.q { - search.query(query); - } + let mut search = index.search(&rtxn); - search.terms_matching_strategy(query.matching_strategy.into()); - - let max_total_hits = self - .pagination_max_total_hits(&rtxn)? - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); - - // Make sure that a user can't get more documents than the hard limit, - // we align that on the offset too. - let offset = min(query.offset.unwrap_or(0), max_total_hits); - let limit = min(query.limit, max_total_hits.saturating_sub(offset)); - - search.offset(offset); - search.limit(limit); - - if let Some(ref filter) = query.filter { - if let Some(facets) = parse_filter(filter)? { - search.filter(facets); - } - } - - if let Some(ref sort) = query.sort { - let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { - Ok(sorts) => sorts, - Err(asc_desc_error) => { - return Err(IndexError::Milli(SortError::from(asc_desc_error).into())) - } - }; - - search.sort_criteria(sort); - } - - let milli::SearchResult { - documents_ids, - matching_words, - candidates, - .. - } = search.execute()?; - - let fields_ids_map = self.fields_ids_map(&rtxn).unwrap(); - - let displayed_ids = self - .displayed_fields_ids(&rtxn)? - .map(|fields| fields.into_iter().collect::>()) - .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); - - let fids = |attrs: &BTreeSet| { - let mut ids = BTreeSet::new(); - for attr in attrs { - if attr == "*" { - ids = displayed_ids.clone(); - break; - } - - if let Some(id) = fields_ids_map.id(attr) { - ids.insert(id); - } - } - ids - }; - - // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), - // but these attributes must be also be present - // - in the fields_ids_map - // - in the the displayed attributes - let to_retrieve_ids: BTreeSet<_> = query - .attributes_to_retrieve - .as_ref() - .map(fids) - .unwrap_or_else(|| displayed_ids.clone()) - .intersection(&displayed_ids) - .cloned() - .collect(); - - let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); - - let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); - - // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` - // These attributes are: - // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) - // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped - // But these attributes must be also present in displayed attributes - let formatted_options = compute_formatted_options( - &attr_to_highlight, - &attr_to_crop, - query.crop_length, - &to_retrieve_ids, - &fields_ids_map, - &displayed_ids, - ); - - let tokenizer = TokenizerBuilder::default().build(); - - let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer); - formatter_builder.crop_marker(query.crop_marker); - formatter_builder.highlight_prefix(query.highlight_pre_tag); - formatter_builder.highlight_suffix(query.highlight_post_tag); - - let mut documents = Vec::new(); - - let documents_iter = self.documents(&rtxn, documents_ids)?; - - for (_id, obkv) in documents_iter { - // First generate a document with all the displayed fields - let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; - - // select the attributes to retrieve - let attributes_to_retrieve = to_retrieve_ids - .iter() - .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); - let mut document = - permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); - - let (matches_position, formatted) = format_fields( - &displayed_document, - &fields_ids_map, - &formatter_builder, - &formatted_options, - query.show_matches_position, - &displayed_ids, - )?; - - if let Some(sort) = query.sort.as_ref() { - insert_geo_distance(sort, &mut document); - } - - let hit = SearchHit { - document, - formatted, - matches_position, - }; - documents.push(hit); - } - - let estimated_total_hits = candidates.len(); - - let facet_distribution = match query.facets { - Some(ref fields) => { - let mut facet_distribution = self.facets_distribution(&rtxn); - - let max_values_by_facet = self - .max_values_per_facet(&rtxn)? - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - if fields.iter().all(|f| f != "*") { - facet_distribution.facets(fields); - } - let distribution = facet_distribution.candidates(candidates).execute()?; - - Some(distribution) - } - None => None, - }; - - let result = SearchResult { - hits: documents, - estimated_total_hits, - query: query.q.clone().unwrap_or_default(), - limit: query.limit, - offset: query.offset.unwrap_or_default(), - processing_time_ms: before_search.elapsed().as_millis(), - facet_distribution, - }; - Ok(result) + if let Some(ref query) = query.q { + search.query(query); } + + let is_finite_pagination = query.is_finite_pagination(); + search.terms_matching_strategy(query.matching_strategy.into()); + + let max_total_hits = index + .pagination_max_total_hits(&rtxn) + .map_err(milli::Error::from)? + .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); + + search.exhaustive_number_hits(is_finite_pagination); + + // compute the offset on the limit depending on the pagination mode. + let (offset, limit) = if is_finite_pagination { + let limit = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + let page = query.page.unwrap_or(1); + + // page 0 gives a limit of 0 forcing Meilisearch to return no document. + page.checked_sub(1).map_or((0, 0), |p| (limit * p, limit)) + } else { + (query.offset, query.limit) + }; + + // Make sure that a user can't get more documents than the hard limit, + // we align that on the offset too. + let offset = min(offset, max_total_hits); + let limit = min(limit, max_total_hits.saturating_sub(offset)); + + search.offset(offset); + search.limit(limit); + + if let Some(ref filter) = query.filter { + if let Some(facets) = parse_filter(filter)? { + search.filter(facets); + } + } + + if let Some(ref sort) = query.sort { + let sort = match sort.iter().map(|s| AscDesc::from_str(s)).collect() { + Ok(sorts) => sorts, + Err(asc_desc_error) => { + return Err(milli::Error::from(SortError::from(asc_desc_error)).into()) + } + }; + + search.sort_criteria(sort); + } + + let milli::SearchResult { documents_ids, matching_words, candidates, .. } = search.execute()?; + + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + + let displayed_ids = index + .displayed_fields_ids(&rtxn)? + .map(|fields| fields.into_iter().collect::>()) + .unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); + + let fids = |attrs: &BTreeSet| { + let mut ids = BTreeSet::new(); + for attr in attrs { + if attr == "*" { + ids = displayed_ids.clone(); + break; + } + + if let Some(id) = fields_ids_map.id(attr) { + ids.insert(id); + } + } + ids + }; + + // The attributes to retrieve are the ones explicitly marked as to retrieve (all by default), + // but these attributes must be also be present + // - in the fields_ids_map + // - in the the displayed attributes + let to_retrieve_ids: BTreeSet<_> = query + .attributes_to_retrieve + .as_ref() + .map(fids) + .unwrap_or_else(|| displayed_ids.clone()) + .intersection(&displayed_ids) + .cloned() + .collect(); + + let attr_to_highlight = query.attributes_to_highlight.unwrap_or_default(); + + let attr_to_crop = query.attributes_to_crop.unwrap_or_default(); + + // Attributes in `formatted_options` correspond to the attributes that will be in `_formatted` + // These attributes are: + // - the attributes asked to be highlighted or cropped (with `attributesToCrop` or `attributesToHighlight`) + // - the attributes asked to be retrieved: these attributes will not be highlighted/cropped + // But these attributes must be also present in displayed attributes + let formatted_options = compute_formatted_options( + &attr_to_highlight, + &attr_to_crop, + query.crop_length, + &to_retrieve_ids, + &fields_ids_map, + &displayed_ids, + ); + + let tokenizer = TokenizerBuilder::default().build(); + + let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer); + formatter_builder.crop_marker(query.crop_marker); + formatter_builder.highlight_prefix(query.highlight_pre_tag); + formatter_builder.highlight_suffix(query.highlight_post_tag); + + let mut documents = Vec::new(); + + let documents_iter = index.documents(&rtxn, documents_ids)?; + + for (_id, obkv) in documents_iter { + // First generate a document with all the displayed fields + let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; + + // select the attributes to retrieve + let attributes_to_retrieve = to_retrieve_ids + .iter() + .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); + let mut document = + permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); + + let (matches_position, formatted) = format_fields( + &displayed_document, + &fields_ids_map, + &formatter_builder, + &formatted_options, + query.show_matches_position, + &displayed_ids, + )?; + + if let Some(sort) = query.sort.as_ref() { + insert_geo_distance(sort, &mut document); + } + + let hit = SearchHit { document, formatted, matches_position }; + documents.push(hit); + } + + let number_of_hits = min(candidates.len() as usize, max_total_hits); + let hits_info = if is_finite_pagination { + let hits_per_page = query.hits_per_page.unwrap_or_else(DEFAULT_SEARCH_LIMIT); + // If hit_per_page is 0, then pages can't be computed and so we respond 0. + let total_pages = (number_of_hits + hits_per_page.saturating_sub(1)) + .checked_div(hits_per_page) + .unwrap_or(0); + + HitsInfo::Pagination { + hits_per_page, + page: query.page.unwrap_or(1), + total_pages, + total_hits: number_of_hits, + } + } else { + HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } + }; + + let facet_distribution = match query.facets { + Some(ref fields) => { + let mut facet_distribution = index.facets_distribution(&rtxn); + + let max_values_by_facet = index + .max_values_per_facet(&rtxn) + .map_err(milli::Error::from)? + .unwrap_or(DEFAULT_VALUES_PER_FACET); + facet_distribution.max_values_per_facet(max_values_by_facet); + + if fields.iter().all(|f| f != "*") { + facet_distribution.facets(fields); + } + let distribution = facet_distribution.candidates(candidates).execute()?; + + Some(distribution) + } + None => None, + }; + + let result = SearchResult { + hits: documents, + hits_info, + query: query.q.clone().unwrap_or_default(), + processing_time_ms: before_search.elapsed().as_millis(), + facet_distribution, + }; + Ok(result) } fn insert_geo_distance(sorts: &[String], document: &mut Document) { @@ -292,10 +326,7 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) { }; if let Some(capture_group) = sorts.iter().find_map(|sort| GEO_REGEX.captures(sort)) { // TODO: TAMO: milli encountered an internal error, what do we want to do? - let base = [ - capture_group[1].parse().unwrap(), - capture_group[2].parse().unwrap(), - ]; + let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()]; let geo_point = &document.get("_geo").unwrap_or(&json!(null)); if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) { let distance = milli::distance_between_two_points(&base, &[lat, lng]); @@ -344,10 +375,7 @@ fn add_highlight_to_formatted_options( displayed_ids: &BTreeSet, ) { for attr in attr_to_highlight { - let new_format = FormatOptions { - highlight: true, - crop: None, - }; + let new_format = FormatOptions { highlight: true, crop: None }; if attr == "*" { for id in displayed_ids { @@ -386,10 +414,7 @@ fn add_crop_to_formatted_options( formatted_options .entry(*id) .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); + .or_insert(FormatOptions { highlight: false, crop: Some(attr_len) }); } } @@ -398,10 +423,7 @@ fn add_crop_to_formatted_options( formatted_options .entry(id) .and_modify(|f| f.crop = Some(attr_len)) - .or_insert(FormatOptions { - highlight: false, - crop: Some(attr_len), - }); + .or_insert(FormatOptions { highlight: false, crop: Some(attr_len) }); } } } @@ -412,10 +434,7 @@ fn add_non_formatted_ids_to_formatted_options( to_retrieve_ids: &BTreeSet, ) { for id in to_retrieve_ids { - formatted_options.entry(*id).or_insert(FormatOptions { - highlight: false, - crop: None, - }); + formatted_options.entry(*id).or_insert(FormatOptions { highlight: false, crop: None }); } } @@ -423,16 +442,13 @@ fn make_document( displayed_attributes: &BTreeSet, field_ids_map: &FieldsIdsMap, obkv: obkv::KvReaderU16, -) -> Result { +) -> Result { let mut document = serde_json::Map::new(); // recreate the original json for (key, value) in obkv.iter() { let value = serde_json::from_slice(value)?; - let key = field_ids_map - .name(key) - .expect("Missing field name") - .to_string(); + let key = field_ids_map.name(key).expect("Missing field name").to_string(); document.insert(key, value); } @@ -453,14 +469,13 @@ fn format_fields<'a, A: AsRef<[u8]>>( formatted_options: &BTreeMap, compute_matches: bool, displayable_ids: &BTreeSet, -) -> Result<(Option, Document)> { +) -> Result<(Option, Document), MeilisearchHttpError> { let mut matches_position = compute_matches.then(BTreeMap::new); let mut document = document.clone(); // select the attributes to retrieve - let displayable_names = displayable_ids - .iter() - .map(|&fid| field_ids_map.name(fid).expect("Missing field name")); + let displayable_names = + displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name")); permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| { // To get the formatting option of each key we need to see all the rules that applies // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` @@ -476,13 +491,7 @@ fn format_fields<'a, A: AsRef<[u8]>>( .reduce(|acc, option| acc.merge(option)); let mut infos = Vec::new(); - *value = format_value( - std::mem::take(value), - builder, - format, - &mut infos, - compute_matches, - ); + *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches); if let Some(matches) = matches_position.as_mut() { if !infos.is_empty() { @@ -582,18 +591,18 @@ fn format_value<'a, A: AsRef<[u8]>>( } } -fn parse_filter(facets: &Value) -> Result> { +fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { match facets { Value::String(expr) => { let condition = Filter::from_str(expr)?; Ok(condition) } Value::Array(arr) => parse_filter_array(arr), - v => Err(FacetError::InvalidExpression(&["Array"], v.clone()).into()), + v => Err(MeilisearchHttpError::InvalidExpression(&["Array"], v.clone())), } } -fn parse_filter_array(arr: &[Value]) -> Result> { +fn parse_filter_array(arr: &[Value]) -> Result, MeilisearchHttpError> { let mut ands = Vec::new(); for value in arr { match value { @@ -604,16 +613,20 @@ fn parse_filter_array(arr: &[Value]) -> Result> { match value { Value::String(s) => ors.push(s.as_str()), v => { - return Err(FacetError::InvalidExpression(&["String"], v.clone()).into()) + return Err(MeilisearchHttpError::InvalidExpression( + &["String"], + v.clone(), + )) } } } ands.push(Either::Left(ors)); } v => { - return Err( - FacetError::InvalidExpression(&["String", "[String]"], v.clone()).into(), - ) + return Err(MeilisearchHttpError::InvalidExpression( + &["String", "[String]"], + v.clone(), + )) } } } diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs deleted file mode 100644 index fe23720aa..000000000 --- a/meilisearch-http/src/task.rs +++ /dev/null @@ -1,434 +0,0 @@ -use std::error::Error; -use std::fmt::{self, Write}; -use std::str::FromStr; -use std::write; - -use meilisearch_lib::index::{Settings, Unchecked}; -use meilisearch_lib::tasks::task::{ - DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult, -}; -use meilisearch_types::error::ResponseError; -use serde::{Deserialize, Serialize, Serializer}; -use time::{Duration, OffsetDateTime}; - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub enum TaskType { - IndexCreation, - IndexUpdate, - IndexDeletion, - DocumentAdditionOrUpdate, - DocumentDeletion, - SettingsUpdate, - DumpCreation, -} - -impl From for TaskType { - fn from(other: TaskContent) -> Self { - match other { - TaskContent::IndexCreation { .. } => TaskType::IndexCreation, - TaskContent::IndexUpdate { .. } => TaskType::IndexUpdate, - TaskContent::IndexDeletion { .. } => TaskType::IndexDeletion, - TaskContent::DocumentAddition { .. } => TaskType::DocumentAdditionOrUpdate, - TaskContent::DocumentDeletion { .. } => TaskType::DocumentDeletion, - TaskContent::SettingsUpdate { .. } => TaskType::SettingsUpdate, - TaskContent::Dump { .. } => TaskType::DumpCreation, - } - } -} - -#[derive(Debug)] -pub struct TaskTypeError { - invalid_type: String, -} - -impl fmt::Display for TaskTypeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "invalid task type `{}`, expecting one of: \ - indexCreation, indexUpdate, indexDeletion, documentAdditionOrUpdate, \ - documentDeletion, settingsUpdate, dumpCreation", - self.invalid_type - ) - } -} - -impl Error for TaskTypeError {} - -impl FromStr for TaskType { - type Err = TaskTypeError; - - fn from_str(type_: &str) -> Result { - if type_.eq_ignore_ascii_case("indexCreation") { - Ok(TaskType::IndexCreation) - } else if type_.eq_ignore_ascii_case("indexUpdate") { - Ok(TaskType::IndexUpdate) - } else if type_.eq_ignore_ascii_case("indexDeletion") { - Ok(TaskType::IndexDeletion) - } else if type_.eq_ignore_ascii_case("documentAdditionOrUpdate") { - Ok(TaskType::DocumentAdditionOrUpdate) - } else if type_.eq_ignore_ascii_case("documentDeletion") { - Ok(TaskType::DocumentDeletion) - } else if type_.eq_ignore_ascii_case("settingsUpdate") { - Ok(TaskType::SettingsUpdate) - } else if type_.eq_ignore_ascii_case("dumpCreation") { - Ok(TaskType::DumpCreation) - } else { - Err(TaskTypeError { - invalid_type: type_.to_string(), - }) - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub enum TaskStatus { - Enqueued, - Processing, - Succeeded, - Failed, -} - -#[derive(Debug)] -pub struct TaskStatusError { - invalid_status: String, -} - -impl fmt::Display for TaskStatusError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "invalid task status `{}`, expecting one of: \ - enqueued, processing, succeeded, or failed", - self.invalid_status, - ) - } -} - -impl Error for TaskStatusError {} - -impl FromStr for TaskStatus { - type Err = TaskStatusError; - - fn from_str(status: &str) -> Result { - if status.eq_ignore_ascii_case("enqueued") { - Ok(TaskStatus::Enqueued) - } else if status.eq_ignore_ascii_case("processing") { - Ok(TaskStatus::Processing) - } else if status.eq_ignore_ascii_case("succeeded") { - Ok(TaskStatus::Succeeded) - } else if status.eq_ignore_ascii_case("failed") { - Ok(TaskStatus::Failed) - } else { - Err(TaskStatusError { - invalid_status: status.to_string(), - }) - } - } -} - -#[derive(Debug, Serialize)] -#[serde(untagged)] -#[allow(clippy::large_enum_variant)] -enum TaskDetails { - #[serde(rename_all = "camelCase")] - DocumentAddition { - received_documents: usize, - indexed_documents: Option, - }, - #[serde(rename_all = "camelCase")] - Settings { - #[serde(flatten)] - settings: Settings, - }, - #[serde(rename_all = "camelCase")] - IndexInfo { primary_key: Option }, - #[serde(rename_all = "camelCase")] - DocumentDeletion { - received_document_ids: usize, - deleted_documents: Option, - }, - #[serde(rename_all = "camelCase")] - ClearAll { deleted_documents: Option }, - #[serde(rename_all = "camelCase")] - Dump { dump_uid: String }, -} - -/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for -/// https://github.com/time-rs/time/issues/378. -/// This code is a port of the old code of time that was removed in 0.2. -fn serialize_duration( - duration: &Option, - serializer: S, -) -> Result { - match duration { - Some(duration) => { - // technically speaking, negative duration is not valid ISO 8601 - if duration.is_negative() { - return serializer.serialize_none(); - } - - const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds(); - let secs = duration.whole_seconds(); - let days = secs / SECS_PER_DAY; - let secs = secs - days * SECS_PER_DAY; - let hasdate = days != 0; - let nanos = duration.subsec_nanoseconds(); - let hastime = (secs != 0 || nanos != 0) || !hasdate; - - // all the following unwrap can't fail - let mut res = String::new(); - write!(&mut res, "P").unwrap(); - - if hasdate { - write!(&mut res, "{}D", days).unwrap(); - } - - const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds(); - const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds(); - - if hastime { - if nanos == 0 { - write!(&mut res, "T{}S", secs).unwrap(); - } else if nanos % NANOS_PER_MILLI == 0 { - write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap(); - } else if nanos % NANOS_PER_MICRO == 0 { - write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap(); - } else { - write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap(); - } - } - - serializer.serialize_str(&res) - } - None => serializer.serialize_none(), - } -} - -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct TaskView { - pub uid: TaskId, - index_uid: Option, - status: TaskStatus, - #[serde(rename = "type")] - task_type: TaskType, - #[serde(skip_serializing_if = "Option::is_none")] - details: Option, - #[serde(skip_serializing_if = "Option::is_none")] - error: Option, - #[serde(serialize_with = "serialize_duration")] - duration: Option, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - enqueued_at: OffsetDateTime, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - started_at: Option, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - finished_at: Option, -} - -impl From for TaskView { - fn from(task: Task) -> Self { - let index_uid = task.index_uid().map(String::from); - let Task { - id, - content, - events, - } = task; - - let (task_type, mut details) = match content { - TaskContent::DocumentAddition { - documents_count, .. - } => { - let details = TaskDetails::DocumentAddition { - received_documents: documents_count, - indexed_documents: None, - }; - - (TaskType::DocumentAdditionOrUpdate, Some(details)) - } - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Ids(ids), - .. - } => ( - TaskType::DocumentDeletion, - Some(TaskDetails::DocumentDeletion { - received_document_ids: ids.len(), - deleted_documents: None, - }), - ), - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Clear, - .. - } => ( - TaskType::DocumentDeletion, - Some(TaskDetails::ClearAll { - deleted_documents: None, - }), - ), - TaskContent::IndexDeletion { .. } => ( - TaskType::IndexDeletion, - Some(TaskDetails::ClearAll { - deleted_documents: None, - }), - ), - TaskContent::SettingsUpdate { settings, .. } => ( - TaskType::SettingsUpdate, - Some(TaskDetails::Settings { settings }), - ), - TaskContent::IndexCreation { primary_key, .. } => ( - TaskType::IndexCreation, - Some(TaskDetails::IndexInfo { primary_key }), - ), - TaskContent::IndexUpdate { primary_key, .. } => ( - TaskType::IndexUpdate, - Some(TaskDetails::IndexInfo { primary_key }), - ), - TaskContent::Dump { uid } => ( - TaskType::DumpCreation, - Some(TaskDetails::Dump { dump_uid: uid }), - ), - }; - - // An event always has at least one event: "Created" - let (status, error, finished_at) = match events.last().unwrap() { - TaskEvent::Created(_) => (TaskStatus::Enqueued, None, None), - TaskEvent::Batched { .. } => (TaskStatus::Enqueued, None, None), - TaskEvent::Processing(_) => (TaskStatus::Processing, None, None), - TaskEvent::Succeeded { timestamp, result } => { - match (result, &mut details) { - ( - TaskResult::DocumentAddition { - indexed_documents: num, - .. - }, - Some(TaskDetails::DocumentAddition { - ref mut indexed_documents, - .. - }), - ) => { - indexed_documents.replace(*num); - } - ( - TaskResult::DocumentDeletion { - deleted_documents: docs, - .. - }, - Some(TaskDetails::DocumentDeletion { - ref mut deleted_documents, - .. - }), - ) => { - deleted_documents.replace(*docs); - } - ( - TaskResult::ClearAll { - deleted_documents: docs, - }, - Some(TaskDetails::ClearAll { - ref mut deleted_documents, - }), - ) => { - deleted_documents.replace(*docs); - } - _ => (), - } - (TaskStatus::Succeeded, None, Some(*timestamp)) - } - TaskEvent::Failed { timestamp, error } => { - match details { - Some(TaskDetails::DocumentDeletion { - ref mut deleted_documents, - .. - }) => { - deleted_documents.replace(0); - } - Some(TaskDetails::ClearAll { - ref mut deleted_documents, - .. - }) => { - deleted_documents.replace(0); - } - Some(TaskDetails::DocumentAddition { - ref mut indexed_documents, - .. - }) => { - indexed_documents.replace(0); - } - _ => (), - } - (TaskStatus::Failed, Some(error.clone()), Some(*timestamp)) - } - }; - - let enqueued_at = match events.first() { - Some(TaskEvent::Created(ts)) => *ts, - _ => unreachable!("A task must always have a creation event."), - }; - - let started_at = events.iter().find_map(|e| match e { - TaskEvent::Processing(ts) => Some(*ts), - _ => None, - }); - - let duration = finished_at.zip(started_at).map(|(tf, ts)| (tf - ts)); - - Self { - uid: id, - index_uid, - status, - task_type, - details, - error, - duration, - enqueued_at, - started_at, - finished_at, - } - } -} - -#[derive(Debug, Serialize)] -pub struct TaskListView { - pub results: Vec, - pub limit: usize, - pub from: Option, - pub next: Option, -} - -#[derive(Debug, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct SummarizedTaskView { - task_uid: TaskId, - index_uid: Option, - status: TaskStatus, - #[serde(rename = "type")] - task_type: TaskType, - #[serde(serialize_with = "time::serde::rfc3339::serialize")] - enqueued_at: OffsetDateTime, -} - -impl From for SummarizedTaskView { - fn from(mut other: Task) -> Self { - let created_event = other - .events - .drain(..1) - .next() - .expect("Task must have an enqueued event."); - - let enqueued_at = match created_event { - TaskEvent::Created(ts) => ts, - _ => unreachable!("The first event of a task must always be 'Created'"), - }; - - Self { - task_uid: other.id, - index_uid: other.index_uid().map(String::from), - status: TaskStatus::Enqueued, - task_type: other.content.into(), - enqueued_at, - } - } -} diff --git a/meilisearch-http/tests/auth/api_keys.rs b/meilisearch-http/tests/auth/api_keys.rs index 7fdf2f129..052eb7509 100644 --- a/meilisearch-http/tests/auth/api_keys.rs +++ b/meilisearch-http/tests/auth/api_keys.rs @@ -1,7 +1,9 @@ -use crate::common::Server; +use std::{thread, time}; + use assert_json_diff::assert_json_include; use serde_json::{json, Value}; -use std::{thread, time}; + +use crate::common::Server; #[actix_rt::test] async fn add_valid_api_key() { @@ -375,7 +377,7 @@ async fn error_add_api_key_invalid_index_uids() { let (response, code) = server.add_api_key(content).await; let expected_response = json!({ - "message": r#"`indexes` field value `["invalid index # / \\name with spaces"]` is invalid. It should be an array of string representing index names."#, + "message": r#"`invalid index # / \name with spaces` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)."#, "code": "invalid_api_key_indexes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes" @@ -1403,10 +1405,10 @@ async fn error_access_api_key_routes_no_master_key_set() { let mut server = Server::new().await; let expected_response = json!({ - "message": "The Authorization header is missing. It must use the bearer authorization method.", - "code": "missing_authorization_header", + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", "type": "auth", - "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + "link": "https://docs.meilisearch.com/errors#missing_master_key" }); let expected_code = 401; @@ -1432,12 +1434,13 @@ async fn error_access_api_key_routes_no_master_key_set() { server.use_api_key("MASTER_KEY"); - let expected_response = json!({"message": "The provided API key is invalid.", - "code": "invalid_api_key", + let expected_response = json!({ + "message": "Meilisearch is running without a master key. To access this API endpoint, you must have set a master key at launch.", + "code": "missing_master_key", "type": "auth", - "link": "https://docs.meilisearch.com/errors#invalid_api_key" + "link": "https://docs.meilisearch.com/errors#missing_master_key" }); - let expected_code = 403; + let expected_code = 401; let (response, code) = server.add_api_key(json!({})).await; diff --git a/meilisearch-http/tests/auth/authorization.rs b/meilisearch-http/tests/auth/authorization.rs index 824ea7b8e..fae6ee7e1 100644 --- a/meilisearch-http/tests/auth/authorization.rs +++ b/meilisearch-http/tests/auth/authorization.rs @@ -1,11 +1,13 @@ -use crate::common::Server; +use std::collections::{HashMap, HashSet}; + use ::time::format_description::well_known::Rfc3339; use maplit::{hashmap, hashset}; use once_cell::sync::Lazy; use serde_json::{json, Value}; -use std::collections::{HashMap, HashSet}; use time::{Duration, OffsetDateTime}; +use crate::common::Server; + pub static AUTHORIZATIONS: Lazy>> = Lazy::new(|| { let mut authorizations = hashmap! { @@ -16,6 +18,7 @@ pub static AUTHORIZATIONS: Lazy hashset!{"documents.get", "documents.*", "*"}, ("DELETE", "/indexes/products/documents/0") => hashset!{"documents.delete", "documents.*", "*"}, ("GET", "/tasks") => hashset!{"tasks.get", "tasks.*", "*"}, + ("DELETE", "/tasks") => hashset!{"tasks.delete", "tasks.*", "*"}, ("GET", "/tasks?indexUid=products") => hashset!{"tasks.get", "tasks.*", "*"}, ("GET", "/tasks/0") => hashset!{"tasks.get", "tasks.*", "*"}, ("PATCH", "/indexes/products/") => hashset!{"indexes.update", "indexes.*", "*"}, @@ -23,6 +26,7 @@ pub static AUTHORIZATIONS: Lazy hashset!{"indexes.delete", "indexes.*", "*"}, ("POST", "/indexes") => hashset!{"indexes.create", "indexes.*", "*"}, ("GET", "/indexes") => hashset!{"indexes.get", "indexes.*", "*"}, + ("POST", "/swap-indexes") => hashset!{"indexes.swap", "indexes.*", "*"}, ("GET", "/indexes/products/settings") => hashset!{"settings.get", "settings.*", "*"}, ("GET", "/indexes/products/settings/displayed-attributes") => hashset!{"settings.get", "settings.*", "*"}, ("GET", "/indexes/products/settings/distinct-attribute") => hashset!{"settings.get", "settings.*", "*"}, @@ -55,21 +59,14 @@ pub static AUTHORIZATIONS: Lazy> = Lazy::new(|| { - AUTHORIZATIONS - .values() - .cloned() - .reduce(|l, r| l.union(&r).cloned().collect()) - .unwrap() + AUTHORIZATIONS.values().cloned().reduce(|l, r| l.union(&r).cloned().collect()).unwrap() }); static INVALID_RESPONSE: Lazy = Lazy::new(|| { @@ -81,7 +78,6 @@ static INVALID_RESPONSE: Lazy = Lazy::new(|| { }); #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_expired_key() { use std::{thread, time}; @@ -107,19 +103,12 @@ async fn error_access_expired_key() { for (method, route) in AUTHORIZATIONS.keys() { let (response, code) = server.dummy_request(method, route).await; - assert_eq!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_unauthorized_index() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); @@ -144,19 +133,12 @@ async fn error_access_unauthorized_index() { { let (response, code) = server.dummy_request(method, route).await; - assert_eq!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_unauthorized_action() { let mut server = Server::new_auth().await; @@ -178,19 +160,12 @@ async fn error_access_unauthorized_action() { server.use_api_key(key); let (response, code) = server.dummy_request(method, route).await; - assert_eq!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_master_key() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); @@ -199,19 +174,12 @@ async fn access_authorized_master_key() { for ((method, route), _) in AUTHORIZATIONS.iter() { let (response, code) = server.dummy_request(method, route).await; - assert_ne!( - response, - INVALID_RESPONSE.clone(), - "on route: {:?} - {:?}", - method, - route - ); + assert_ne!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_ne!(code, 403); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_restricted_index() { let mut server = Server::new_auth().await; for ((method, route), actions) in AUTHORIZATIONS.iter() { @@ -248,7 +216,6 @@ async fn access_authorized_restricted_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_no_index_restriction() { let mut server = Server::new_auth().await; @@ -286,7 +253,6 @@ async fn access_authorized_no_index_restriction() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_stats_restricted_index() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -299,7 +265,8 @@ async fn access_authorized_stats_restricted_index() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on `products` index only. let content = json!({ @@ -326,7 +293,6 @@ async fn access_authorized_stats_restricted_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn access_authorized_stats_no_index_restriction() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -339,7 +305,8 @@ async fn access_authorized_stats_no_index_restriction() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on all indexes. let content = json!({ @@ -366,7 +333,6 @@ async fn access_authorized_stats_no_index_restriction() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn list_authorized_indexes_restricted_index() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -379,7 +345,8 @@ async fn list_authorized_indexes_restricted_index() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on `products` index only. let content = json!({ @@ -407,7 +374,6 @@ async fn list_authorized_indexes_restricted_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn list_authorized_indexes_no_index_restriction() { let mut server = Server::new_auth().await; server.use_admin_key("MASTER_KEY").await; @@ -420,7 +386,8 @@ async fn list_authorized_indexes_no_index_restriction() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on all indexes. let content = json!({ @@ -460,7 +427,8 @@ async fn list_authorized_tasks_restricted_index() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on `products` index only. let content = json!({ @@ -500,7 +468,8 @@ async fn list_authorized_tasks_no_index_restriction() { let index = server.index("products"); let (response, code) = index.create(Some("product_id")).await; assert_eq!(202, code, "{:?}", &response); - index.wait_task(0).await; + let task_id = response["taskUid"].as_u64().unwrap(); + index.wait_task(task_id).await; // create key with access on all indexes. let content = json!({ diff --git a/meilisearch-http/tests/auth/mod.rs b/meilisearch-http/tests/auth/mod.rs index 03c24dd6d..dec02cf1f 100644 --- a/meilisearch-http/tests/auth/mod.rs +++ b/meilisearch-http/tests/auth/mod.rs @@ -3,11 +3,11 @@ mod authorization; mod payload; mod tenant_token; -use crate::common::Server; use actix_web::http::StatusCode; - use serde_json::{json, Value}; +use crate::common::Server; + impl Server { pub fn use_api_key(&mut self, api_key: impl AsRef) { self.service.api_key = Some(api_key.as_ref().to_string()); diff --git a/meilisearch-http/tests/auth/payload.rs b/meilisearch-http/tests/auth/payload.rs index 4437cd5f7..78eec3eb2 100644 --- a/meilisearch-http/tests/auth/payload.rs +++ b/meilisearch-http/tests/auth/payload.rs @@ -1,8 +1,8 @@ -use crate::common::Server; use actix_web::test; -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; +use crate::common::Server; + #[actix_rt::test] async fn error_api_key_bad_content_types() { let content = json!({ @@ -15,14 +15,7 @@ async fn error_api_key_bad_content_types() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -44,10 +37,7 @@ async fn error_api_key_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); // patch let req = test::TestRequest::patch() @@ -69,10 +59,7 @@ async fn error_api_key_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); } #[actix_rt::test] @@ -87,14 +74,7 @@ async fn error_api_key_empty_content_types() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -116,10 +96,7 @@ async fn error_api_key_empty_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); // patch let req = test::TestRequest::patch() @@ -141,10 +118,7 @@ async fn error_api_key_empty_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); } #[actix_rt::test] @@ -159,14 +133,7 @@ async fn error_api_key_missing_content_types() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -187,10 +154,7 @@ async fn error_api_key_missing_content_types() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); // patch let req = test::TestRequest::patch() @@ -211,10 +175,7 @@ async fn error_api_key_missing_content_types() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); } #[actix_rt::test] @@ -223,14 +184,7 @@ async fn error_api_key_empty_payload() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -246,10 +200,7 @@ async fn error_api_key_empty_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); assert_eq!(response["message"], json!(r#"A json payload is missing."#)); // patch @@ -266,10 +217,7 @@ async fn error_api_key_empty_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); assert_eq!(response["message"], json!(r#"A json payload is missing."#)); } @@ -279,14 +227,7 @@ async fn error_api_key_malformed_payload() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // post let req = test::TestRequest::post() @@ -302,10 +243,7 @@ async fn error_api_key_malformed_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); assert_eq!( response["message"], json!( @@ -327,10 +265,7 @@ async fn error_api_key_malformed_payload() { assert_eq!(status_code, 400); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); assert_eq!( response["message"], json!( diff --git a/meilisearch-http/tests/auth/tenant_token.rs b/meilisearch-http/tests/auth/tenant_token.rs index 712c73993..fbf9d2b49 100644 --- a/meilisearch-http/tests/auth/tenant_token.rs +++ b/meilisearch-http/tests/auth/tenant_token.rs @@ -1,12 +1,13 @@ -use crate::common::Server; +use std::collections::HashMap; + use ::time::format_description::well_known::Rfc3339; use maplit::hashmap; use once_cell::sync::Lazy; use serde_json::{json, Value}; -use std::collections::HashMap; use time::{Duration, OffsetDateTime}; use super::authorization::{ALL_ACTIONS, AUTHORIZATIONS}; +use crate::common::Server; fn generate_tenant_token( parent_uid: impl AsRef, @@ -17,12 +18,8 @@ fn generate_tenant_token( let parent_uid = parent_uid.as_ref(); body.insert("apiKeyUid", json!(parent_uid)); - encode( - &Header::default(), - &body, - &EncodingKey::from_secret(parent_key.as_ref().as_bytes()), - ) - .unwrap() + encode(&Header::default(), &body, &EncodingKey::from_secret(parent_key.as_ref().as_bytes())) + .unwrap() } static DOCUMENTS: Lazy = Lazy::new(|| { @@ -206,7 +203,6 @@ macro_rules! compute_forbidden_search { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn search_authorized_simple_token() { let tenant_tokens = vec![ hashmap! { @@ -255,7 +251,6 @@ async fn search_authorized_simple_token() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn search_authorized_filter_token() { let tenant_tokens = vec![ hashmap! { @@ -309,7 +304,6 @@ async fn search_authorized_filter_token() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn filter_search_authorized_filter_token() { let tenant_tokens = vec![ hashmap! { @@ -363,7 +357,6 @@ async fn filter_search_authorized_filter_token() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_search_token_forbidden_parent_key() { let tenant_tokens = vec![ hashmap! { @@ -396,7 +389,6 @@ async fn error_search_token_forbidden_parent_key() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_search_forbidden_token() { let tenant_tokens = vec![ // bad index @@ -451,7 +443,6 @@ async fn error_search_forbidden_token() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_forbidden_routes() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); @@ -486,7 +477,6 @@ async fn error_access_forbidden_routes() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_expired_parent_key() { use std::{thread, time}; let mut server = Server::new_auth().await; @@ -513,24 +503,19 @@ async fn error_access_expired_parent_key() { server.use_api_key(&web_token); // test search request while parent_key is not expired - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(code, 403); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn error_access_modified_token() { let mut server = Server::new_auth().await; server.use_api_key("MASTER_KEY"); @@ -556,9 +541,7 @@ async fn error_access_modified_token() { server.use_api_key(&web_token); // test search request while web_token is valid - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(code, 403); @@ -576,9 +559,7 @@ async fn error_access_modified_token() { .join("."); server.use_api_key(&altered_token); - let (response, code) = server - .dummy_request("POST", "/indexes/products/search") - .await; + let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } diff --git a/meilisearch-http/tests/common/encoder.rs b/meilisearch-http/tests/common/encoder.rs new file mode 100644 index 000000000..b6a60f73e --- /dev/null +++ b/meilisearch-http/tests/common/encoder.rs @@ -0,0 +1,78 @@ +use std::io::{Read, Write}; + +use actix_http::header::TryIntoHeaderPair; +use bytes::Bytes; +use flate2::read::{GzDecoder, ZlibDecoder}; +use flate2::write::{GzEncoder, ZlibEncoder}; +use flate2::Compression; + +#[derive(Clone, Copy)] +pub enum Encoder { + Plain, + Gzip, + Deflate, + Brotli, +} + +impl Encoder { + pub fn encode(self: &Encoder, body: impl Into) -> impl Into { + match self { + Self::Gzip => { + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(&body.into()).expect("Failed to encode request body"); + encoder.finish().expect("Failed to encode request body") + } + Self::Deflate => { + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(&body.into()).expect("Failed to encode request body"); + encoder.finish().unwrap() + } + Self::Plain => Vec::from(body.into()), + Self::Brotli => { + let mut encoder = brotli::CompressorWriter::new(Vec::new(), 32 * 1024, 3, 22); + encoder.write_all(&body.into()).expect("Failed to encode request body"); + encoder.flush().expect("Failed to encode request body"); + encoder.into_inner() + } + } + } + + pub fn decode(self: &Encoder, bytes: impl Into) -> impl Into { + let mut buffer = Vec::new(); + let input = bytes.into(); + match self { + Self::Gzip => { + GzDecoder::new(input.as_ref()) + .read_to_end(&mut buffer) + .expect("Invalid gzip stream"); + } + Self::Deflate => { + ZlibDecoder::new(input.as_ref()) + .read_to_end(&mut buffer) + .expect("Invalid zlib stream"); + } + Self::Plain => { + buffer.write_all(input.as_ref()).expect("Unexpected memory copying issue"); + } + Self::Brotli => { + brotli::Decompressor::new(input.as_ref(), 4096) + .read_to_end(&mut buffer) + .expect("Invalid brotli stream"); + } + }; + buffer + } + + pub fn header(self: &Encoder) -> Option { + match self { + Self::Plain => None, + Self::Gzip => Some(("Content-Encoding", "gzip")), + Self::Deflate => Some(("Content-Encoding", "deflate")), + Self::Brotli => Some(("Content-Encoding", "br")), + } + } + + pub fn iterator() -> impl Iterator { + [Self::Plain, Self::Gzip, Self::Deflate, Self::Brotli].iter().copied() + } +} diff --git a/meilisearch-http/tests/common/index.rs b/meilisearch-http/tests/common/index.rs index 90d138ced..dac3653f7 100644 --- a/meilisearch-http/tests/common/index.rs +++ b/meilisearch-http/tests/common/index.rs @@ -1,34 +1,32 @@ -use std::{ - fmt::Write, - panic::{catch_unwind, resume_unwind, UnwindSafe}, - time::Duration, -}; +use std::fmt::Write; +use std::panic::{catch_unwind, resume_unwind, UnwindSafe}; +use std::time::Duration; use actix_web::http::StatusCode; use serde_json::{json, Value}; use tokio::time::sleep; -use urlencoding::encode; +use urlencoding::encode as urlencode; +use super::encoder::Encoder; use super::service::Service; pub struct Index<'a> { pub uid: String, pub service: &'a Service, + pub encoder: Encoder, } #[allow(dead_code)] impl Index<'_> { pub async fn get(&self) -> (Value, StatusCode) { - let url = format!("/indexes/{}", encode(self.uid.as_ref())); + let url = format!("/indexes/{}", urlencode(self.uid.as_ref())); self.service.get(url).await } pub async fn load_test_set(&self) -> u64 { - let url = format!("/indexes/{}/documents", encode(self.uid.as_ref())); - let (response, code) = self - .service - .post_str(url, include_str!("../assets/test_set.json")) - .await; + let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref())); + let (response, code) = + self.service.post_str(url, include_str!("../assets/test_set.json")).await; assert_eq!(code, 202); let update_id = response["taskUid"].as_i64().unwrap(); self.wait_task(update_id as u64).await; @@ -40,20 +38,20 @@ impl Index<'_> { "uid": self.uid, "primaryKey": primary_key, }); - self.service.post("/indexes", body).await + self.service.post_encoded("/indexes", body, self.encoder).await } pub async fn update(&self, primary_key: Option<&str>) -> (Value, StatusCode) { let body = json!({ "primaryKey": primary_key, }); - let url = format!("/indexes/{}", encode(self.uid.as_ref())); + let url = format!("/indexes/{}", urlencode(self.uid.as_ref())); - self.service.patch(url, body).await + self.service.patch_encoded(url, body, self.encoder).await } pub async fn delete(&self) -> (Value, StatusCode) { - let url = format!("/indexes/{}", encode(self.uid.as_ref())); + let url = format!("/indexes/{}", urlencode(self.uid.as_ref())); self.service.delete(url).await } @@ -63,14 +61,12 @@ impl Index<'_> { primary_key: Option<&str>, ) -> (Value, StatusCode) { let url = match primary_key { - Some(key) => format!( - "/indexes/{}/documents?primaryKey={}", - encode(self.uid.as_ref()), - key - ), - None => format!("/indexes/{}/documents", encode(self.uid.as_ref())), + Some(key) => { + format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key) + } + None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())), }; - self.service.post(url, documents).await + self.service.post_encoded(url, documents, self.encoder).await } pub async fn update_documents( @@ -79,14 +75,12 @@ impl Index<'_> { primary_key: Option<&str>, ) -> (Value, StatusCode) { let url = match primary_key { - Some(key) => format!( - "/indexes/{}/documents?primaryKey={}", - encode(self.uid.as_ref()), - key - ), - None => format!("/indexes/{}/documents", encode(self.uid.as_ref())), + Some(key) => { + format!("/indexes/{}/documents?primaryKey={}", urlencode(self.uid.as_ref()), key) + } + None => format!("/indexes/{}/documents", urlencode(self.uid.as_ref())), }; - self.service.put(url, documents).await + self.service.put_encoded(url, documents, self.encoder).await } pub async fn wait_task(&self, update_id: u64) -> Value { @@ -112,17 +106,17 @@ impl Index<'_> { } pub async fn list_tasks(&self) -> (Value, StatusCode) { - let url = format!("/tasks?indexUid={}", self.uid); + let url = format!("/tasks?indexUids={}", self.uid); self.service.get(url).await } - pub async fn filtered_tasks(&self, type_: &[&str], status: &[&str]) -> (Value, StatusCode) { - let mut url = format!("/tasks?indexUid={}", self.uid); - if !type_.is_empty() { - let _ = write!(url, "&type={}", type_.join(",")); + pub async fn filtered_tasks(&self, types: &[&str], statuses: &[&str]) -> (Value, StatusCode) { + let mut url = format!("/tasks?indexUids={}", self.uid); + if !types.is_empty() { + let _ = write!(url, "&types={}", types.join(",")); } - if !status.is_empty() { - let _ = write!(url, "&status={}", status.join(",")); + if !statuses.is_empty() { + let _ = write!(url, "&statuses={}", statuses.join(",")); } self.service.get(url).await } @@ -132,7 +126,7 @@ impl Index<'_> { id: u64, options: Option, ) -> (Value, StatusCode) { - let mut url = format!("/indexes/{}/documents/{}", encode(self.uid.as_ref()), id); + let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id); if let Some(fields) = options.and_then(|o| o.fields) { let _ = write!(url, "?fields={}", fields.join(",")); } @@ -140,7 +134,7 @@ impl Index<'_> { } pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) { - let mut url = format!("/indexes/{}/documents?", encode(self.uid.as_ref())); + let mut url = format!("/indexes/{}/documents?", urlencode(self.uid.as_ref())); if let Some(limit) = options.limit { let _ = write!(url, "limit={}&", limit); } @@ -157,42 +151,37 @@ impl Index<'_> { } pub async fn delete_document(&self, id: u64) -> (Value, StatusCode) { - let url = format!("/indexes/{}/documents/{}", encode(self.uid.as_ref()), id); + let url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id); self.service.delete(url).await } pub async fn clear_all_documents(&self) -> (Value, StatusCode) { - let url = format!("/indexes/{}/documents", encode(self.uid.as_ref())); + let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref())); self.service.delete(url).await } pub async fn delete_batch(&self, ids: Vec) -> (Value, StatusCode) { - let url = format!( - "/indexes/{}/documents/delete-batch", - encode(self.uid.as_ref()) - ); - self.service - .post(url, serde_json::to_value(&ids).unwrap()) - .await + let url = format!("/indexes/{}/documents/delete-batch", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, serde_json::to_value(&ids).unwrap(), self.encoder).await } pub async fn settings(&self) -> (Value, StatusCode) { - let url = format!("/indexes/{}/settings", encode(self.uid.as_ref())); + let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); self.service.get(url).await } pub async fn update_settings(&self, settings: Value) -> (Value, StatusCode) { - let url = format!("/indexes/{}/settings", encode(self.uid.as_ref())); - self.service.patch(url, settings).await + let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); + self.service.patch_encoded(url, settings, self.encoder).await } pub async fn delete_settings(&self) -> (Value, StatusCode) { - let url = format!("/indexes/{}/settings", encode(self.uid.as_ref())); + let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); self.service.delete(url).await } pub async fn stats(&self) -> (Value, StatusCode) { - let url = format!("/indexes/{}/stats", encode(self.uid.as_ref())); + let url = format!("/indexes/{}/stats", urlencode(self.uid.as_ref())); self.service.get(url).await } @@ -217,31 +206,25 @@ impl Index<'_> { } pub async fn search_post(&self, query: Value) -> (Value, StatusCode) { - let url = format!("/indexes/{}/search", encode(self.uid.as_ref())); - self.service.post(url, query).await + let url = format!("/indexes/{}/search", urlencode(self.uid.as_ref())); + self.service.post_encoded(url, query, self.encoder).await } pub async fn search_get(&self, query: Value) -> (Value, StatusCode) { let params = yaup::to_string(&query).unwrap(); - let url = format!("/indexes/{}/search?{}", encode(self.uid.as_ref()), params); + let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), params); self.service.get(url).await } pub async fn update_distinct_attribute(&self, value: Value) -> (Value, StatusCode) { - let url = format!( - "/indexes/{}/settings/{}", - encode(self.uid.as_ref()), - "distinct-attribute" - ); - self.service.put(url, value).await + let url = + format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute"); + self.service.put_encoded(url, value, self.encoder).await } pub async fn get_distinct_attribute(&self) -> (Value, StatusCode) { - let url = format!( - "/indexes/{}/settings/{}", - encode(self.uid.as_ref()), - "distinct-attribute" - ); + let url = + format!("/indexes/{}/settings/{}", urlencode(self.uid.as_ref()), "distinct-attribute"); self.service.get(url).await } } diff --git a/meilisearch-http/tests/common/mod.rs b/meilisearch-http/tests/common/mod.rs index b076b0ea5..9c6d572d9 100644 --- a/meilisearch-http/tests/common/mod.rs +++ b/meilisearch-http/tests/common/mod.rs @@ -1,3 +1,4 @@ +pub mod encoder; pub mod index; pub mod server; pub mod service; @@ -14,18 +15,10 @@ macro_rules! test_post_get_search { let get_query: meilisearch_http::routes::search::SearchQuery = post_query.into(); let get_query = ::serde_url_params::to_string(&get_query).unwrap(); let ($response, $status_code) = $server.search_get(&get_query).await; - let _ = ::std::panic::catch_unwind(|| $block).map_err(|e| { - panic!( - "panic in get route: {:?}", - e.downcast_ref::<&str>().unwrap() - ) - }); + let _ = ::std::panic::catch_unwind(|| $block) + .map_err(|e| panic!("panic in get route: {:?}", e.downcast_ref::<&str>().unwrap())); let ($response, $status_code) = $server.search_post($query).await; - let _ = ::std::panic::catch_unwind(|| $block).map_err(|e| { - panic!( - "panic in post route: {:?}", - e.downcast_ref::<&str>().unwrap() - ) - }); + let _ = ::std::panic::catch_unwind(|| $block) + .map_err(|e| panic!("panic in post route: {:?}", e.downcast_ref::<&str>().unwrap())); }; } diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 30a64c90e..3f72248c5 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -1,21 +1,23 @@ #![allow(dead_code)] -use clap::Parser; use std::path::Path; +use std::time::Duration; +use actix_http::body::MessageBody; +use actix_web::dev::ServiceResponse; use actix_web::http::StatusCode; use byte_unit::{Byte, ByteUnit}; -use meilisearch_auth::AuthController; -use meilisearch_http::setup_meilisearch; -use meilisearch_lib::options::{IndexerOpts, MaxMemory}; +use clap::Parser; +use meilisearch_http::option::{IndexerOpts, MaxMemory, Opt}; +use meilisearch_http::{analytics, create_app, setup_meilisearch}; use once_cell::sync::Lazy; -use serde_json::Value; +use serde_json::{json, Value}; use tempfile::TempDir; - -use meilisearch_http::option::Opt; +use tokio::time::sleep; use super::index::Index; use super::service::Service; +use crate::common::encoder::Encoder; pub struct Server { pub service: Service, @@ -37,19 +39,10 @@ impl Server { let options = default_settings(dir.path()); - let meilisearch = setup_meilisearch(&options).unwrap(); - let auth = AuthController::new(&options.db_path, &options.master_key).unwrap(); - let service = Service { - meilisearch, - auth, - options, - api_key: None, - }; + let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); + let service = Service { index_scheduler, auth, options, api_key: None }; - Server { - service, - _dir: Some(dir), - } + Server { service, _dir: Some(dir) } } pub async fn new_auth_with_options(mut options: Opt, dir: TempDir) -> Self { @@ -61,19 +54,10 @@ impl Server { options.master_key = Some("MASTER_KEY".to_string()); - let meilisearch = setup_meilisearch(&options).unwrap(); - let auth = AuthController::new(&options.db_path, &options.master_key).unwrap(); - let service = Service { - meilisearch, - auth, - options, - api_key: None, - }; + let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); + let service = Service { index_scheduler, auth, options, api_key: None }; - Server { - service, - _dir: Some(dir), - } + Server { service, _dir: Some(dir) } } pub async fn new_auth() -> Self { @@ -83,27 +67,36 @@ impl Server { } pub async fn new_with_options(options: Opt) -> Result { - let meilisearch = setup_meilisearch(&options)?; - let auth = AuthController::new(&options.db_path, &options.master_key)?; - let service = Service { - meilisearch, - auth, - options, - api_key: None, - }; + let (index_scheduler, auth) = setup_meilisearch(&options)?; + let service = Service { index_scheduler, auth, options, api_key: None }; - Ok(Server { - service, - _dir: None, - }) + Ok(Server { service, _dir: None }) + } + + pub async fn init_web_app( + &self, + ) -> impl actix_web::dev::Service< + actix_http::Request, + Response = ServiceResponse, + Error = actix_web::Error, + > { + actix_web::test::init_service(create_app( + self.service.index_scheduler.clone().into(), + self.service.auth.clone(), + self.service.options.clone(), + analytics::MockAnalytics::new(&self.service.options), + true, + )) + .await } /// Returns a view to an index. There is no guarantee that the index exists. pub fn index(&self, uid: impl AsRef) -> Index<'_> { - Index { - uid: uid.as_ref().to_string(), - service: &self.service, - } + self.index_with_encoder(uid, Encoder::Plain) + } + + pub fn index_with_encoder(&self, uid: impl AsRef, encoder: Encoder) -> Index<'_> { + Index { uid: uid.as_ref().to_string(), service: &self.service, encoder } } pub async fn list_indexes( @@ -121,9 +114,7 @@ impl Server { .map(|(offset, limit)| format!("{offset}&{limit}")) .or_else(|| offset.xor(limit)); if let Some(query_parameter) = query_parameter { - self.service - .get(format!("/indexes?{query_parameter}")) - .await + self.service.get(format!("/indexes?{query_parameter}")).await } else { self.service.get("/indexes").await } @@ -141,9 +132,53 @@ impl Server { self.service.get("/tasks").await } + pub async fn tasks_filter(&self, filter: Value) -> (Value, StatusCode) { + self.service.get(format!("/tasks?{}", yaup::to_string(&filter).unwrap())).await + } + pub async fn get_dump_status(&self, uid: &str) -> (Value, StatusCode) { self.service.get(format!("/dumps/{}/status", uid)).await } + + pub async fn create_dump(&self) -> (Value, StatusCode) { + self.service.post("/dumps", json!(null)).await + } + + pub async fn index_swap(&self, value: Value) -> (Value, StatusCode) { + self.service.post("/swap-indexes", value).await + } + + pub async fn cancel_tasks(&self, value: Value) -> (Value, StatusCode) { + self.service + .post(format!("/tasks/cancel?{}", yaup::to_string(&value).unwrap()), json!(null)) + .await + } + + pub async fn delete_tasks(&self, value: Value) -> (Value, StatusCode) { + self.service.delete(format!("/tasks?{}", yaup::to_string(&value).unwrap())).await + } + + pub async fn wait_task(&self, update_id: u64) -> Value { + // try several times to get status, or panic to not wait forever + let url = format!("/tasks/{}", update_id); + for _ in 0..100 { + let (response, status_code) = self.service.get(&url).await; + assert_eq!(200, status_code, "response: {}", response); + + if response["status"] == "succeeded" || response["status"] == "failed" { + return response; + } + + // wait 0.5 second. + sleep(Duration::from_millis(500)).await; + } + panic!("Timeout waiting for update id"); + } + + pub async fn get_task(&self, update_id: u64) -> (Value, StatusCode) { + let url = format!("/tasks/{}", update_id); + self.service.get(url).await + } } pub fn default_settings(dir: impl AsRef) -> Opt { diff --git a/meilisearch-http/tests/common/service.rs b/meilisearch-http/tests/common/service.rs index 0834abf8d..945ff4c13 100644 --- a/meilisearch-http/tests/common/service.rs +++ b/meilisearch-http/tests/common/service.rs @@ -1,12 +1,18 @@ -use actix_web::{http::StatusCode, test}; +use std::sync::Arc; + +use actix_web::http::header::ContentType; +use actix_web::http::StatusCode; +use actix_web::test; +use actix_web::test::TestRequest; +use index_scheduler::IndexScheduler; use meilisearch_auth::AuthController; -use meilisearch_lib::MeiliSearch; +use meilisearch_http::{analytics, create_app, Opt}; use serde_json::Value; -use meilisearch_http::{analytics, create_app, Opt}; +use crate::common::encoder::Encoder; pub struct Service { - pub meilisearch: MeiliSearch, + pub index_scheduler: Arc, pub auth: AuthController, pub options: Opt, pub api_key: Option, @@ -14,26 +20,18 @@ pub struct Service { impl Service { pub async fn post(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, - true, - self.options, - analytics::MockAnalytics::new(&self.options).0 - )) - .await; + self.post_encoded(url, body, Encoder::Plain).await + } - let mut req = test::TestRequest::post().uri(url.as_ref()).set_json(&body); - if let Some(api_key) = &self.api_key { - req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); - } - let req = req.to_request(); - let res = test::call_service(&app, req).await; - let status_code = res.status(); - - let body = test::read_body(res).await; - let response = serde_json::from_slice(&body).unwrap_or_default(); - (response, status_code) + pub async fn post_encoded( + &self, + url: impl AsRef, + body: Value, + encoder: Encoder, + ) -> (Value, StatusCode) { + let mut req = test::TestRequest::post().uri(url.as_ref()); + req = self.encode(req, body, encoder); + self.request(req).await } /// Send a test post request from a text body, with a `content-type:application/json` header. @@ -42,111 +40,63 @@ impl Service { url: impl AsRef, body: impl AsRef, ) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, - true, - self.options, - analytics::MockAnalytics::new(&self.options).0 - )) - .await; - - let mut req = test::TestRequest::post() + let req = test::TestRequest::post() .uri(url.as_ref()) .set_payload(body.as_ref().to_string()) .insert_header(("content-type", "application/json")); - if let Some(api_key) = &self.api_key { - req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); - } - let req = req.to_request(); - let res = test::call_service(&app, req).await; - let status_code = res.status(); - - let body = test::read_body(res).await; - let response = serde_json::from_slice(&body).unwrap_or_default(); - (response, status_code) + self.request(req).await } pub async fn get(&self, url: impl AsRef) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, - true, - self.options, - analytics::MockAnalytics::new(&self.options).0 - )) - .await; - - let mut req = test::TestRequest::get().uri(url.as_ref()); - if let Some(api_key) = &self.api_key { - req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); - } - let req = req.to_request(); - let res = test::call_service(&app, req).await; - let status_code = res.status(); - - let body = test::read_body(res).await; - let response = serde_json::from_slice(&body).unwrap_or_default(); - (response, status_code) + let req = test::TestRequest::get().uri(url.as_ref()); + self.request(req).await } pub async fn put(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, - true, - self.options, - analytics::MockAnalytics::new(&self.options).0 - )) - .await; + self.put_encoded(url, body, Encoder::Plain).await + } - let mut req = test::TestRequest::put().uri(url.as_ref()).set_json(&body); - if let Some(api_key) = &self.api_key { - req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); - } - let req = req.to_request(); - let res = test::call_service(&app, req).await; - let status_code = res.status(); - - let body = test::read_body(res).await; - let response = serde_json::from_slice(&body).unwrap_or_default(); - (response, status_code) + pub async fn put_encoded( + &self, + url: impl AsRef, + body: Value, + encoder: Encoder, + ) -> (Value, StatusCode) { + let mut req = test::TestRequest::put().uri(url.as_ref()); + req = self.encode(req, body, encoder); + self.request(req).await } pub async fn patch(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, - true, - self.options, - analytics::MockAnalytics::new(&self.options).0 - )) - .await; + self.patch_encoded(url, body, Encoder::Plain).await + } - let mut req = test::TestRequest::patch().uri(url.as_ref()).set_json(&body); - if let Some(api_key) = &self.api_key { - req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); - } - let req = req.to_request(); - let res = test::call_service(&app, req).await; - let status_code = res.status(); - - let body = test::read_body(res).await; - let response = serde_json::from_slice(&body).unwrap_or_default(); - (response, status_code) + pub async fn patch_encoded( + &self, + url: impl AsRef, + body: Value, + encoder: Encoder, + ) -> (Value, StatusCode) { + let mut req = test::TestRequest::patch().uri(url.as_ref()); + req = self.encode(req, body, encoder); + self.request(req).await } pub async fn delete(&self, url: impl AsRef) -> (Value, StatusCode) { - let app = test::init_service(create_app!( - &self.meilisearch, - &self.auth, + let req = test::TestRequest::delete().uri(url.as_ref()); + self.request(req).await + } + + pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + let app = test::init_service(create_app( + self.index_scheduler.clone().into(), + self.auth.clone(), + self.options.clone(), + analytics::MockAnalytics::new(&self.options), true, - self.options, - analytics::MockAnalytics::new(&self.options).0 )) .await; - let mut req = test::TestRequest::delete().uri(url.as_ref()); if let Some(api_key) = &self.api_key { req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); } @@ -158,4 +108,16 @@ impl Service { let response = serde_json::from_slice(&body).unwrap_or_default(); (response, status_code) } + + fn encode(&self, req: TestRequest, body: Value, encoder: Encoder) -> TestRequest { + let bytes = serde_json::to_string(&body).expect("Failed to serialize test data to json"); + let encoded_body = encoder.encode(bytes); + let header = encoder.header(); + match header { + Some(header) => req.insert_header(header), + None => req, + } + .set_payload(encoded_body) + .insert_header(ContentType::json()) + } } diff --git a/meilisearch-http/tests/content_type.rs b/meilisearch-http/tests/content_type.rs index 47e224bd1..e16a83c06 100644 --- a/meilisearch-http/tests/content_type.rs +++ b/meilisearch-http/tests/content_type.rs @@ -2,11 +2,11 @@ mod common; -use crate::common::Server; use actix_web::test; -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; +use crate::common::Server; + enum HttpVerb { Put, Patch, @@ -59,14 +59,8 @@ async fn error_json_bad_content_type() { let document = "{}"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + for (verb, route) in routes { // Good content-type, we probably have an error since we didn't send anything in the json // so we only ensure we didn't get a bad media type error. @@ -82,11 +76,7 @@ async fn error_json_bad_content_type() { "calling the route `{}` with a content-type of json isn't supposed to throw a bad media type error", route); // No content-type. - let req = verb - .test_request() - .uri(route) - .set_payload(document) - .to_request(); + let req = verb.test_request().uri(route).set_payload(document).to_request(); let res = test::call_service(&app, req).await; let status_code = res.status(); let body = test::read_body(res).await; @@ -142,14 +132,7 @@ async fn extract_actual_content_type() { let route = "/indexes/doggo/documents"; let documents = "[{}]"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; // Good content-type, we probably have an error since we didn't send anything in the json // so we only ensure we didn't get a bad media type error. diff --git a/meilisearch-http/tests/dashboard/mod.rs b/meilisearch-http/tests/dashboard/mod.rs index d097cfd4b..2699cd16f 100644 --- a/meilisearch-http/tests/dashboard/mod.rs +++ b/meilisearch-http/tests/dashboard/mod.rs @@ -1,5 +1,6 @@ use crate::common::Server; +#[cfg(feature = "mini-dashboard")] #[actix_rt::test] async fn dashboard_assets_load() { let server = Server::new().await; diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 685428784..6f1fabeae 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -1,9 +1,10 @@ -use crate::common::{GetAllDocumentsOptions, Server}; use actix_web::test; - -use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; -use time::{format_description::well_known::Rfc3339, OffsetDateTime}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; + +use crate::common::encoder::Encoder; +use crate::common::{GetAllDocumentsOptions, Server}; /// This is the basic usage of our API and every other tests uses the content-type application/json #[actix_rt::test] @@ -17,14 +18,8 @@ async fn add_documents_test_json_content_types() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -62,14 +57,8 @@ async fn add_single_document_test_json_content_types() { // this is a what is expected and should work let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -97,6 +86,81 @@ async fn add_single_document_test_json_content_types() { assert_eq!(response["taskUid"], 1); } +/// Here we try sending encoded (compressed) document request +#[actix_rt::test] +async fn add_single_document_gzip_encoded() { + let document = json!({ + "id": 1, + "content": "Bouvier Bernois", + }); + + // this is a what is expected and should work + let server = Server::new().await; + let app = server.init_web_app().await; + // post + let document = serde_json::to_string(&document).unwrap(); + let encoder = Encoder::Gzip; + let req = test::TestRequest::post() + .uri("/indexes/dog/documents") + .set_payload(encoder.encode(document.clone())) + .insert_header(("content-type", "application/json")) + .insert_header(encoder.header().unwrap()) + .to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + assert_eq!(status_code, 202); + assert_eq!(response["taskUid"], 0); + + // put + let req = test::TestRequest::put() + .uri("/indexes/dog/documents") + .set_payload(encoder.encode(document)) + .insert_header(("content-type", "application/json")) + .insert_header(encoder.header().unwrap()) + .to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + assert_eq!(status_code, 202); + assert_eq!(response["taskUid"], 1); +} + +/// Here we try document request with every encoding +#[actix_rt::test] +async fn add_single_document_with_every_encoding() { + let document = json!({ + "id": 1, + "content": "Bouvier Bernois", + }); + + // this is a what is expected and should work + let server = Server::new().await; + let app = server.init_web_app().await; + // post + let document = serde_json::to_string(&document).unwrap(); + + for (task_uid, encoder) in Encoder::iterator().enumerate() { + let mut req = test::TestRequest::post() + .uri("/indexes/dog/documents") + .set_payload(encoder.encode(document.clone())) + .insert_header(("content-type", "application/json")); + req = match encoder.header() { + Some(header) => req.insert_header(header), + None => req, + }; + let req = req.to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + assert_eq!(status_code, 202); + assert_eq!(response["taskUid"], task_uid); + } +} + /// any other content-type is must be refused #[actix_rt::test] async fn error_add_documents_test_bad_content_types() { @@ -108,14 +172,8 @@ async fn error_add_documents_test_bad_content_types() { ]); let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -135,10 +193,7 @@ async fn error_add_documents_test_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); // put let req = test::TestRequest::put() @@ -159,10 +214,7 @@ async fn error_add_documents_test_bad_content_types() { ); assert_eq!(response["code"], "invalid_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#invalid_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#invalid_content_type"); } /// missing content-type must be refused @@ -176,14 +228,8 @@ async fn error_add_documents_test_no_content_type() { ]); let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -202,10 +248,7 @@ async fn error_add_documents_test_no_content_type() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); // put let req = test::TestRequest::put() @@ -225,10 +268,7 @@ async fn error_add_documents_test_no_content_type() { ); assert_eq!(response["code"], "missing_content_type"); assert_eq!(response["type"], "invalid_request"); - assert_eq!( - response["link"], - "https://docs.meilisearch.com/errors#missing_content_type" - ); + assert_eq!(response["link"], "https://docs.meilisearch.com/errors#missing_content_type"); } #[actix_rt::test] @@ -236,14 +276,8 @@ async fn error_add_malformed_csv_documents() { let document = "id, content\n1234, hello, world\n12, hello world"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -263,10 +297,7 @@ async fn error_add_malformed_csv_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // put let req = test::TestRequest::put() @@ -287,10 +318,7 @@ async fn error_add_malformed_csv_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); } #[actix_rt::test] @@ -298,14 +326,8 @@ async fn error_add_malformed_json_documents() { let document = r#"[{"id": 1}, {id: 2}]"#; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -325,10 +347,7 @@ async fn error_add_malformed_json_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // put let req = test::TestRequest::put() @@ -349,10 +368,7 @@ async fn error_add_malformed_json_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // truncate @@ -372,15 +388,12 @@ async fn error_add_malformed_json_documents() { assert_eq!( response["message"], json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`."# + r#"The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // add one more char to the long string to test if the truncating works. let document = format!("\"{}m\"", long); @@ -395,14 +408,11 @@ async fn error_add_malformed_json_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!("The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`.") + json!("The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); } #[actix_rt::test] @@ -410,14 +420,8 @@ async fn error_add_malformed_ndjson_documents() { let document = "{\"id\": 1}\n{id: 2}"; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -437,10 +441,7 @@ async fn error_add_malformed_ndjson_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); // put let req = test::TestRequest::put() @@ -459,10 +460,7 @@ async fn error_add_malformed_ndjson_documents() { ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#malformed_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#malformed_payload")); } #[actix_rt::test] @@ -470,14 +468,8 @@ async fn error_add_missing_payload_csv_documents() { let document = ""; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -492,10 +484,7 @@ async fn error_add_missing_payload_csv_documents() { assert_eq!(response["message"], json!(r#"A csv payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); // put let req = test::TestRequest::put() @@ -511,10 +500,7 @@ async fn error_add_missing_payload_csv_documents() { assert_eq!(response["message"], json!(r#"A csv payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); } #[actix_rt::test] @@ -522,14 +508,8 @@ async fn error_add_missing_payload_json_documents() { let document = ""; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -544,10 +524,7 @@ async fn error_add_missing_payload_json_documents() { assert_eq!(response["message"], json!(r#"A json payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); // put let req = test::TestRequest::put() @@ -563,10 +540,7 @@ async fn error_add_missing_payload_json_documents() { assert_eq!(response["message"], json!(r#"A json payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); } #[actix_rt::test] @@ -574,14 +548,8 @@ async fn error_add_missing_payload_ndjson_documents() { let document = ""; let server = Server::new().await; - let app = test::init_service(create_app!( - &server.service.meilisearch, - &server.service.auth, - true, - server.service.options, - analytics::MockAnalytics::new(&server.service.options).0 - )) - .await; + let app = server.init_web_app().await; + // post let req = test::TestRequest::post() .uri("/indexes/dog/documents") @@ -593,16 +561,10 @@ async fn error_add_missing_payload_ndjson_documents() { let body = test::read_body(res).await; let response: Value = serde_json::from_slice(&body).unwrap_or_default(); assert_eq!(status_code, 400); - assert_eq!( - response["message"], - json!(r#"A ndjson payload is missing."#) - ); + assert_eq!(response["message"], json!(r#"A ndjson payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); // put let req = test::TestRequest::put() @@ -615,16 +577,10 @@ async fn error_add_missing_payload_ndjson_documents() { let body = test::read_body(res).await; let response: Value = serde_json::from_slice(&body).unwrap_or_default(); assert_eq!(status_code, 400); - assert_eq!( - response["message"], - json!(r#"A ndjson payload is missing."#) - ); + assert_eq!(response["message"], json!(r#"A ndjson payload is missing."#)); assert_eq!(response["code"], json!("missing_payload")); assert_eq!(response["type"], json!("invalid_request")); - assert_eq!( - response["link"], - json!("https://docs.meilisearch.com/errors#missing_payload") - ); + assert_eq!(response["link"], json!("https://docs.meilisearch.com/errors#missing_payload")); } #[actix_rt::test] @@ -680,24 +636,7 @@ async fn error_document_add_create_index_bad_uid() { let (response, code) = index.add_documents(json!([{"id": 1}]), None).await; let expected_response = json!({ - "message": "invalid index uid `883 fj!`, the uid must be an integer or a string containing only alphanumeric characters a-z A-Z 0-9, hyphens - and underscores _.", - "code": "invalid_index_uid", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_index_uid" - }); - - assert_eq!(code, 400); - assert_eq!(response, expected_response); -} - -#[actix_rt::test] -async fn error_document_update_create_index_bad_uid() { - let server = Server::new().await; - let index = server.index("883 fj!"); - let (response, code) = index.update_documents(json!([{"id": 1}]), None).await; - - let expected_response = json!({ - "message": "invalid index uid `883 fj!`, the uid must be an integer or a string containing only alphanumeric characters a-z A-Z 0-9, hyphens - and underscores _.", + "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -736,35 +675,6 @@ async fn document_addition_with_primary_key() { assert_eq!(response["primaryKey"], "primary"); } -#[actix_rt::test] -async fn document_update_with_primary_key() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = json!([ - { - "primary": 1, - "content": "foo", - } - ]); - let (_response, code) = index.update_documents(documents, Some("primary")).await; - assert_eq!(code, 202); - - index.wait_task(0).await; - - let (response, code) = index.get_task(0).await; - assert_eq!(code, 200); - assert_eq!(response["status"], "succeeded"); - assert_eq!(response["uid"], 0); - assert_eq!(response["type"], "documentAdditionOrUpdate"); - assert_eq!(response["details"]["indexedDocuments"], 1); - assert_eq!(response["details"]["receivedDocuments"], 1); - - let (response, code) = index.get().await; - assert_eq!(code, 200); - assert_eq!(response["primaryKey"], "primary"); -} - #[actix_rt::test] async fn replace_document() { let server = Server::new().await; @@ -811,47 +721,6 @@ async fn add_no_documents() { assert_eq!(code, 202); } -#[actix_rt::test] -async fn update_document() { - let server = Server::new().await; - let index = server.index("test"); - - let documents = json!([ - { - "doc_id": 1, - "content": "foo", - } - ]); - - let (_response, code) = index.add_documents(documents, None).await; - assert_eq!(code, 202); - - index.wait_task(0).await; - - let documents = json!([ - { - "doc_id": 1, - "other": "bar", - } - ]); - - let (response, code) = index.update_documents(documents, None).await; - assert_eq!(code, 202, "response: {}", response); - - index.wait_task(1).await; - - let (response, code) = index.get_task(1).await; - assert_eq!(code, 200); - assert_eq!(response["status"], "succeeded"); - - let (response, code) = index.get_document(1, None).await; - assert_eq!(code, 200); - assert_eq!( - response.to_string(), - r##"{"doc_id":1,"content":"foo","other":"bar"}"## - ); -} - #[actix_rt::test] async fn add_larger_dataset() { let server = Server::new().await; @@ -864,36 +733,12 @@ async fn add_larger_dataset() { assert_eq!(response["details"]["indexedDocuments"], 77); assert_eq!(response["details"]["receivedDocuments"], 77); let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(1000), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(1000), ..Default::default() }) .await; assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 77); } -#[actix_rt::test] -async fn update_larger_dataset() { - let server = Server::new().await; - let index = server.index("test"); - let documents = serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(); - index.update_documents(documents, None).await; - index.wait_task(0).await; - let (response, code) = index.get_task(0).await; - assert_eq!(code, 200); - assert_eq!(response["type"], "documentAdditionOrUpdate"); - assert_eq!(response["details"]["indexedDocuments"], 77); - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(1000), - ..Default::default() - }) - .await; - assert_eq!(code, 200); - assert_eq!(response["results"].as_array().unwrap().len(), 77); -} - #[actix_rt::test] async fn error_add_documents_bad_document_id() { let server = Server::new().await; @@ -924,34 +769,6 @@ async fn error_add_documents_bad_document_id() { ); } -#[actix_rt::test] -async fn error_update_documents_bad_document_id() { - let server = Server::new().await; - let index = server.index("test"); - index.create(Some("docid")).await; - let documents = json!([ - { - "docid": "foo & bar", - "content": "foobar" - } - ]); - index.update_documents(documents, None).await; - let response = index.wait_task(1).await; - assert_eq!(response["status"], json!("failed")); - assert_eq!( - response["error"]["message"], - json!( - r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."# - ) - ); - assert_eq!(response["error"]["code"], json!("invalid_document_id")); - assert_eq!(response["error"]["type"], json!("invalid_request")); - assert_eq!( - response["error"]["link"], - json!("https://docs.meilisearch.com/errors#invalid_document_id") - ); -} - #[actix_rt::test] async fn error_add_documents_missing_document_id() { let server = Server::new().await; @@ -980,32 +797,6 @@ async fn error_add_documents_missing_document_id() { ); } -#[actix_rt::test] -async fn error_update_documents_missing_document_id() { - let server = Server::new().await; - let index = server.index("test"); - index.create(Some("docid")).await; - let documents = json!([ - { - "id": "11", - "content": "foobar" - } - ]); - index.update_documents(documents, None).await; - let response = index.wait_task(1).await; - assert_eq!(response["status"], "failed"); - assert_eq!( - response["error"]["message"], - r#"Document doesn't have a `docid` attribute: `{"id":"11","content":"foobar"}`."# - ); - assert_eq!(response["error"]["code"], "missing_document_id"); - assert_eq!(response["error"]["type"], "invalid_request"); - assert_eq!( - response["error"]["link"], - "https://docs.meilisearch.com/errors#missing_document_id" - ); -} - #[actix_rt::test] #[ignore] // // TODO: Fix in an other PR: this does not provoke any error. async fn error_document_field_limit_reached() { @@ -1047,9 +838,7 @@ async fn add_documents_invalid_geo_field() { let server = Server::new().await; let index = server.index("test"); index.create(Some("id")).await; - index - .update_settings(json!({"sortableAttributes": ["_geo"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["_geo"]})).await; let documents = json!([ { @@ -1192,10 +981,7 @@ async fn batch_several_documents_addition() { // Check if there are exactly 120 documents (150 - 30) in the index; let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(200), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(200), ..Default::default() }) .await; assert_eq!(code, 200, "failed with `{}`", response); assert_eq!(response["results"].as_array().unwrap().len(), 120); diff --git a/meilisearch-http/tests/documents/delete_documents.rs b/meilisearch-http/tests/documents/delete_documents.rs index 8c7ddaa7b..e36e2f033 100644 --- a/meilisearch-http/tests/documents/delete_documents.rs +++ b/meilisearch-http/tests/documents/delete_documents.rs @@ -29,9 +29,7 @@ async fn delete_one_unexisting_document() { async fn delete_one_document() { let server = Server::new().await; let index = server.index("test"); - index - .add_documents(json!([{ "id": 0, "content": "foobar" }]), None) - .await; + index.add_documents(json!([{ "id": 0, "content": "foobar" }]), None).await; index.wait_task(0).await; let (_response, code) = server.index("test").delete_document(0).await; assert_eq!(code, 202); @@ -68,9 +66,7 @@ async fn clear_all_documents() { assert_eq!(code, 202); let _update = index.wait_task(1).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); } @@ -85,9 +81,7 @@ async fn clear_all_documents_empty_index() { assert_eq!(code, 202); let _update = index.wait_task(0).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); } @@ -121,9 +115,7 @@ async fn delete_batch() { assert_eq!(code, 202); let _update = index.wait_task(1).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 1); assert_eq!(response["results"][0]["id"], json!(3)); @@ -139,9 +131,7 @@ async fn delete_no_document_batch() { assert_eq!(code, 202, "{}", _response); let _update = index.wait_task(1).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 3); } diff --git a/meilisearch-http/tests/documents/get_documents.rs b/meilisearch-http/tests/documents/get_documents.rs index c15d3f7fa..9bc54973e 100644 --- a/meilisearch-http/tests/documents/get_documents.rs +++ b/meilisearch-http/tests/documents/get_documents.rs @@ -1,6 +1,10 @@ -use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server}; +use actix_web::test; +use http::header::ACCEPT_ENCODING; +use serde_json::{json, Value}; +use urlencoding::encode as urlencode; -use serde_json::json; +use crate::common::encoder::Encoder; +use crate::common::{GetAllDocumentsOptions, GetDocumentOptions, Server}; // TODO: partial test since we are testing error, amd error is not yet fully implemented in // transplant @@ -54,14 +58,8 @@ async fn get_document() { }) ); - let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["id"]), - }), - ) - .await; + let (response, code) = + index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["id"]) })).await; assert_eq!(code, 200); assert_eq!( response, @@ -71,12 +69,7 @@ async fn get_document() { ); let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["nested.content"]), - }), - ) + .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["nested.content"]) })) .await; assert_eq!(code, 200); assert_eq!( @@ -90,10 +83,8 @@ async fn get_document() { #[actix_rt::test] async fn error_get_unexisting_index_all_documents() { let server = Server::new().await; - let (response, code) = server - .index("test") - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = + server.index("test").get_all_documents(GetAllDocumentsOptions::default()).await; let expected_response = json!({ "message": "Index `test` not found.", @@ -115,9 +106,7 @@ async fn get_no_document() { index.wait_task(0).await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert!(response["results"].as_array().unwrap().is_empty()); } @@ -128,9 +117,7 @@ async fn get_all_documents_no_options() { let index = server.index("test"); index.load_test_set().await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); let arr = response["results"].as_array().unwrap(); assert_eq!(arr.len(), 20); @@ -155,6 +142,32 @@ async fn get_all_documents_no_options() { assert_eq!(first, arr[0]); } +#[actix_rt::test] +async fn get_all_documents_no_options_with_response_compression() { + let server = Server::new().await; + let index_uid = "test"; + let index = server.index(index_uid); + index.load_test_set().await; + + let app = server.init_web_app().await; + let req = test::TestRequest::get() + .uri(&format!("/indexes/{}/documents?", urlencode(index_uid))) + .insert_header((ACCEPT_ENCODING, "gzip")) + .to_request(); + + let res = test::call_service(&app, req).await; + + assert_eq!(res.status(), 200); + + let bytes = test::read_body(res).await; + let decoded = Encoder::Gzip.decode(bytes); + let parsed_response = + serde_json::from_slice::(decoded.into().as_ref()).expect("Expecting valid json"); + + let arr = parsed_response["results"].as_array().unwrap(); + assert_eq!(arr.len(), 20); +} + #[actix_rt::test] async fn test_get_all_documents_limit() { let server = Server::new().await; @@ -162,10 +175,7 @@ async fn test_get_all_documents_limit() { index.load_test_set().await; let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - limit: Some(5), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() }) .await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 5); @@ -182,10 +192,7 @@ async fn test_get_all_documents_offset() { index.load_test_set().await; let (response, code) = index - .get_all_documents(GetAllDocumentsOptions { - offset: Some(5), - ..Default::default() - }) + .get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() }) .await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 20); @@ -308,24 +315,12 @@ async fn get_document_s_nested_attributes_to_retrieve() { assert_eq!(code, 202); index.wait_task(1).await; - let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["content"]), - }), - ) - .await; + let (response, code) = + index.get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await; assert_eq!(code, 200); assert_eq!(response, json!({})); - let (response, code) = index - .get_document( - 1, - Some(GetDocumentOptions { - fields: Some(vec!["content"]), - }), - ) - .await; + let (response, code) = + index.get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content"]) })).await; assert_eq!(code, 200); assert_eq!( response, @@ -338,12 +333,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { ); let (response, code) = index - .get_document( - 0, - Some(GetDocumentOptions { - fields: Some(vec!["content.truc"]), - }), - ) + .get_document(0, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) })) .await; assert_eq!(code, 200); assert_eq!( @@ -353,12 +343,7 @@ async fn get_document_s_nested_attributes_to_retrieve() { }) ); let (response, code) = index - .get_document( - 1, - Some(GetDocumentOptions { - fields: Some(vec!["content.truc"]), - }), - ) + .get_document(1, Some(GetDocumentOptions { fields: Some(vec!["content.truc"]) })) .await; assert_eq!(code, 200); assert_eq!( @@ -375,20 +360,13 @@ async fn get_document_s_nested_attributes_to_retrieve() { async fn get_documents_displayed_attributes_is_ignored() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"displayedAttributes": ["gender"]})) - .await; + index.update_settings(json!({"displayedAttributes": ["gender"]})).await; index.load_test_set().await; - let (response, code) = index - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 20); - assert_eq!( - response["results"][0].as_object().unwrap().keys().count(), - 16 - ); + assert_eq!(response["results"][0].as_object().unwrap().keys().count(), 16); assert!(response["results"][0]["gender"] != json!(null)); assert_eq!(response["offset"], json!(0)); diff --git a/meilisearch-http/tests/documents/mod.rs b/meilisearch-http/tests/documents/mod.rs index a791a596f..794b57c3a 100644 --- a/meilisearch-http/tests/documents/mod.rs +++ b/meilisearch-http/tests/documents/mod.rs @@ -1,3 +1,4 @@ mod add_documents; mod delete_documents; mod get_documents; +mod update_documents; diff --git a/meilisearch-http/tests/documents/update_documents.rs b/meilisearch-http/tests/documents/update_documents.rs new file mode 100644 index 000000000..688605861 --- /dev/null +++ b/meilisearch-http/tests/documents/update_documents.rs @@ -0,0 +1,198 @@ +use serde_json::json; + +use crate::common::encoder::Encoder; +use crate::common::{GetAllDocumentsOptions, Server}; + +#[actix_rt::test] +async fn error_document_update_create_index_bad_uid() { + let server = Server::new().await; + let index = server.index("883 fj!"); + let (response, code) = index.update_documents(json!([{"id": 1}]), None).await; + + let expected_response = json!({ + "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "code": "invalid_index_uid", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_index_uid" + }); + + assert_eq!(code, 400); + assert_eq!(response, expected_response); +} + +#[actix_rt::test] +async fn document_update_with_primary_key() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = json!([ + { + "primary": 1, + "content": "foo", + } + ]); + let (_response, code) = index.update_documents(documents, Some("primary")).await; + assert_eq!(code, 202); + + index.wait_task(0).await; + + let (response, code) = index.get_task(0).await; + assert_eq!(code, 200); + assert_eq!(response["status"], "succeeded"); + assert_eq!(response["uid"], 0); + assert_eq!(response["type"], "documentAdditionOrUpdate"); + assert_eq!(response["details"]["indexedDocuments"], 1); + assert_eq!(response["details"]["receivedDocuments"], 1); + + let (response, code) = index.get().await; + assert_eq!(code, 200); + assert_eq!(response["primaryKey"], "primary"); +} + +#[actix_rt::test] +async fn update_document() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = json!([ + { + "doc_id": 1, + "content": "foo", + } + ]); + + let (_response, code) = index.add_documents(documents, None).await; + assert_eq!(code, 202); + + index.wait_task(0).await; + + let documents = json!([ + { + "doc_id": 1, + "other": "bar", + } + ]); + + let (response, code) = index.update_documents(documents, None).await; + assert_eq!(code, 202, "response: {}", response); + + index.wait_task(1).await; + + let (response, code) = index.get_task(1).await; + assert_eq!(code, 200); + assert_eq!(response["status"], "succeeded"); + + let (response, code) = index.get_document(1, None).await; + assert_eq!(code, 200); + assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##); +} + +#[actix_rt::test] +async fn update_document_gzip_encoded() { + let server = Server::new().await; + let index = server.index_with_encoder("test", Encoder::Gzip); + + let documents = json!([ + { + "doc_id": 1, + "content": "foo", + } + ]); + + let (_response, code) = index.add_documents(documents, None).await; + assert_eq!(code, 202); + + index.wait_task(0).await; + + let documents = json!([ + { + "doc_id": 1, + "other": "bar", + } + ]); + + let (response, code) = index.update_documents(documents, None).await; + assert_eq!(code, 202, "response: {}", response); + + index.wait_task(1).await; + + let (response, code) = index.get_task(1).await; + assert_eq!(code, 200); + assert_eq!(response["status"], "succeeded"); + + let (response, code) = index.get_document(1, None).await; + assert_eq!(code, 200); + assert_eq!(response.to_string(), r##"{"doc_id":1,"content":"foo","other":"bar"}"##); +} + +#[actix_rt::test] +async fn update_larger_dataset() { + let server = Server::new().await; + let index = server.index("test"); + let documents = serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(); + index.update_documents(documents, None).await; + index.wait_task(0).await; + let (response, code) = index.get_task(0).await; + assert_eq!(code, 200); + assert_eq!(response["type"], "documentAdditionOrUpdate"); + assert_eq!(response["details"]["indexedDocuments"], 77); + let (response, code) = index + .get_all_documents(GetAllDocumentsOptions { limit: Some(1000), ..Default::default() }) + .await; + assert_eq!(code, 200); + assert_eq!(response["results"].as_array().unwrap().len(), 77); +} + +#[actix_rt::test] +async fn error_update_documents_bad_document_id() { + let server = Server::new().await; + let index = server.index("test"); + index.create(Some("docid")).await; + let documents = json!([ + { + "docid": "foo & bar", + "content": "foobar" + } + ]); + index.update_documents(documents, None).await; + let response = index.wait_task(1).await; + assert_eq!(response["status"], json!("failed")); + assert_eq!( + response["error"]["message"], + json!( + r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."# + ) + ); + assert_eq!(response["error"]["code"], json!("invalid_document_id")); + assert_eq!(response["error"]["type"], json!("invalid_request")); + assert_eq!( + response["error"]["link"], + json!("https://docs.meilisearch.com/errors#invalid_document_id") + ); +} + +#[actix_rt::test] +async fn error_update_documents_missing_document_id() { + let server = Server::new().await; + let index = server.index("test"); + index.create(Some("docid")).await; + let documents = json!([ + { + "id": "11", + "content": "foobar" + } + ]); + index.update_documents(documents, None).await; + let response = index.wait_task(1).await; + assert_eq!(response["status"], "failed"); + assert_eq!( + response["error"]["message"], + r#"Document doesn't have a `docid` attribute: `{"id":"11","content":"foobar"}`."# + ); + assert_eq!(response["error"]["code"], "missing_document_id"); + assert_eq!(response["error"]["type"], "invalid_request"); + assert_eq!( + response["error"]["link"], + "https://docs.meilisearch.com/errors#missing_document_id" + ); +} diff --git a/meilisearch-http/tests/dumps/mod.rs b/meilisearch-http/tests/dumps/mod.rs index 389f6b480..cd9ba3828 100644 --- a/meilisearch-http/tests/dumps/mod.rs +++ b/meilisearch-http/tests/dumps/mod.rs @@ -1,10 +1,10 @@ mod data; -use crate::common::{default_settings, GetAllDocumentsOptions, Server}; use meilisearch_http::Opt; use serde_json::json; use self::data::GetDump; +use crate::common::{default_settings, GetAllDocumentsOptions, Server}; // all the following test are ignored on windows. See #2364 #[actix_rt::test] @@ -17,28 +17,19 @@ async fn import_dump_v1() { GetDump::MoviesWithSettingsV1.path(), GetDump::RubyGemsWithSettingsV1.path(), ] { - let options = Opt { - import_dump: Some(path), - ..default_settings(temp.path()) - }; - let error = Server::new_with_options(options) - .await - .map(|_| ()) - .unwrap_err(); + let options = Opt { import_dump: Some(path), ..default_settings(temp.path()) }; + let error = Server::new_with_options(options).await.map(drop).unwrap_err(); assert_eq!(error.to_string(), "The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards."); } } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v2_movie_raw() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::MoviesRawV2.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::MoviesRawV2.path()), ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; @@ -68,7 +59,7 @@ async fn import_dump_v2_movie_raw() { assert_eq!(code, 200); assert_eq!( tasks, - json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit": 20, "from": 0, "next": null }) + json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit": 20, "from": 0, "next": null }) ); // finally we're just going to check that we can still get a few documents by id @@ -95,7 +86,6 @@ async fn import_dump_v2_movie_raw() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v2_movie_with_settings() { let temp = tempfile::tempdir().unwrap(); @@ -132,7 +122,7 @@ async fn import_dump_v2_movie_with_settings() { assert_eq!(code, 200); assert_eq!( tasks, - json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) + json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) ); // finally we're just going to check that we can still get a few documents by id @@ -159,7 +149,6 @@ async fn import_dump_v2_movie_with_settings() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v2_rubygems_with_settings() { let temp = tempfile::tempdir().unwrap(); @@ -196,7 +185,7 @@ async fn import_dump_v2_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( tasks["results"][0], - json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) + json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) ); // finally we're just going to check that we can still get a few documents by id @@ -223,14 +212,11 @@ async fn import_dump_v2_rubygems_with_settings() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v3_movie_raw() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::MoviesRawV3.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::MoviesRawV3.path()), ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; @@ -260,7 +246,7 @@ async fn import_dump_v3_movie_raw() { assert_eq!(code, 200); assert_eq!( tasks, - json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit": 20, "from": 0, "next": null }) + json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit": 20, "from": 0, "next": null }) ); // finally we're just going to check that we can still get a few documents by id @@ -287,7 +273,6 @@ async fn import_dump_v3_movie_raw() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v3_movie_with_settings() { let temp = tempfile::tempdir().unwrap(); @@ -324,7 +309,7 @@ async fn import_dump_v3_movie_with_settings() { assert_eq!(code, 200); assert_eq!( tasks, - json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) + json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) ); // finally we're just going to check that we can["results"] still get a few documents by id @@ -351,7 +336,6 @@ async fn import_dump_v3_movie_with_settings() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v3_rubygems_with_settings() { let temp = tempfile::tempdir().unwrap(); @@ -388,7 +372,7 @@ async fn import_dump_v3_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( tasks["results"][0], - json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) + json!({"uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) ); // finally we're just going to check that we can still get a few documents by id @@ -415,14 +399,11 @@ async fn import_dump_v3_rubygems_with_settings() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v4_movie_raw() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::MoviesRawV4.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::MoviesRawV4.path()), ..default_settings(temp.path()) }; let server = Server::new_with_options(options).await.unwrap(); let (indexes, code) = server.list_indexes(None, None).await; @@ -452,7 +433,7 @@ async fn import_dump_v4_movie_raw() { assert_eq!(code, 200); assert_eq!( tasks, - json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit" : 20, "from": 0, "next": null }) + json!({ "results": [{"uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT41.751156S", "enqueuedAt": "2021-09-08T08:30:30.550282Z", "startedAt": "2021-09-08T08:30:30.553012Z", "finishedAt": "2021-09-08T08:31:12.304168Z" }], "limit" : 20, "from": 0, "next": null }) ); // finally we're just going to check that we can still get a few documents by id @@ -479,7 +460,6 @@ async fn import_dump_v4_movie_raw() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v4_movie_with_settings() { let temp = tempfile::tempdir().unwrap(); @@ -516,7 +496,7 @@ async fn import_dump_v4_movie_with_settings() { assert_eq!(code, 200); assert_eq!( tasks, - json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) + json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT37.488777S", "enqueuedAt": "2021-09-08T08:24:02.323444Z", "startedAt": "2021-09-08T08:24:02.324145Z", "finishedAt": "2021-09-08T08:24:39.812922Z" }, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31944 }, "error": null, "duration": "PT39.941318S", "enqueuedAt": "2021-09-08T08:21:14.742672Z", "startedAt": "2021-09-08T08:21:14.750166Z", "finishedAt": "2021-09-08T08:21:54.691484Z" }], "limit": 20, "from": 1, "next": null }) ); // finally we're just going to check that we can still get a few documents by id @@ -543,7 +523,6 @@ async fn import_dump_v4_movie_with_settings() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v4_rubygems_with_settings() { let temp = tempfile::tempdir().unwrap(); @@ -580,7 +559,7 @@ async fn import_dump_v4_rubygems_with_settings() { assert_eq!(code, 200); assert_eq!( tasks["results"][0], - json!({ "uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) + json!({ "uid": 92, "indexUid": "rubygems", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": {"receivedDocuments": 0, "indexedDocuments": 1042}, "error": null, "duration": "PT14.034672S", "enqueuedAt": "2021-09-08T08:40:31.390775Z", "startedAt": "2021-09-08T08:51:39.060642Z", "finishedAt": "2021-09-08T08:51:53.095314Z"}) ); // finally we're just going to check that we can still get a few documents by id @@ -607,14 +586,11 @@ async fn import_dump_v4_rubygems_with_settings() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn import_dump_v5() { let temp = tempfile::tempdir().unwrap(); - let options = Opt { - import_dump: Some(GetDump::TestV5.path()), - ..default_settings(temp.path()) - }; + let options = + Opt { import_dump: Some(GetDump::TestV5.path()), ..default_settings(temp.path()) }; let mut server = Server::new_auth_with_options(options, temp).await; server.use_api_key("MASTER_KEY"); @@ -654,14 +630,10 @@ async fn import_dump_v5() { assert_eq!(code, 200); assert_eq!(stats, expected_stats); - let (docs, code) = index2 - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(docs["results"].as_array().unwrap().len(), 10); - let (docs, code) = index1 - .get_all_documents(GetAllDocumentsOptions::default()) - .await; + let (docs, code) = index1.get_all_documents(GetAllDocumentsOptions::default()).await; assert_eq!(code, 200); assert_eq!(docs["results"].as_array().unwrap().len(), 10); diff --git a/meilisearch-http/tests/index/create_index.rs b/meilisearch-http/tests/index/create_index.rs index a1c508e1f..8e01a8113 100644 --- a/meilisearch-http/tests/index/create_index.rs +++ b/meilisearch-http/tests/index/create_index.rs @@ -1,6 +1,11 @@ -use crate::common::Server; +use actix_web::http::header::ContentType; +use actix_web::test; +use http::header::ACCEPT_ENCODING; use serde_json::{json, Value}; +use crate::common::encoder::Encoder; +use crate::common::Server; + #[actix_rt::test] async fn create_index_no_primary_key() { let server = Server::new().await; @@ -18,6 +23,88 @@ async fn create_index_no_primary_key() { assert_eq!(response["details"]["primaryKey"], Value::Null); } +#[actix_rt::test] +async fn create_index_with_gzip_encoded_request() { + let server = Server::new().await; + let index = server.index_with_encoder("test", Encoder::Gzip); + let (response, code) = index.create(None).await; + + assert_eq!(code, 202); + + assert_eq!(response["status"], "enqueued"); + + let response = index.wait_task(0).await; + + assert_eq!(response["status"], "succeeded"); + assert_eq!(response["type"], "indexCreation"); + assert_eq!(response["details"]["primaryKey"], Value::Null); +} + +#[actix_rt::test] +async fn create_index_with_gzip_encoded_request_and_receiving_brotli_encoded_response() { + let server = Server::new().await; + let app = server.init_web_app().await; + + let body = serde_json::to_string(&json!({ + "uid": "test", + "primaryKey": None::<&str>, + })) + .unwrap(); + let req = test::TestRequest::post() + .uri("/indexes") + .insert_header(Encoder::Gzip.header().unwrap()) + .insert_header((ACCEPT_ENCODING, "br")) + .insert_header(ContentType::json()) + .set_payload(Encoder::Gzip.encode(body)) + .to_request(); + + let res = test::call_service(&app, req).await; + + assert_eq!(res.status(), 202); + + let bytes = test::read_body(res).await; + let decoded = Encoder::Brotli.decode(bytes); + let parsed_response = + serde_json::from_slice::(decoded.into().as_ref()).expect("Expecting valid json"); + + assert_eq!(parsed_response["taskUid"], 0); + assert_eq!(parsed_response["indexUid"], "test"); +} + +#[actix_rt::test] +async fn create_index_with_zlib_encoded_request() { + let server = Server::new().await; + let index = server.index_with_encoder("test", Encoder::Deflate); + let (response, code) = index.create(None).await; + + assert_eq!(code, 202); + + assert_eq!(response["status"], "enqueued"); + + let response = index.wait_task(0).await; + + assert_eq!(response["status"], "succeeded"); + assert_eq!(response["type"], "indexCreation"); + assert_eq!(response["details"]["primaryKey"], Value::Null); +} + +#[actix_rt::test] +async fn create_index_with_brotli_encoded_request() { + let server = Server::new().await; + let index = server.index_with_encoder("test", Encoder::Brotli); + let (response, code) = index.create(None).await; + + assert_eq!(code, 202); + + assert_eq!(response["status"], "enqueued"); + + let response = index.wait_task(0).await; + + assert_eq!(response["status"], "succeeded"); + assert_eq!(response["type"], "indexCreation"); + assert_eq!(response["details"]["primaryKey"], Value::Null); +} + #[actix_rt::test] async fn create_index_with_primary_key() { let server = Server::new().await; @@ -102,7 +189,7 @@ async fn error_create_with_invalid_index_uid() { let (response, code) = index.create(None).await; let expected_response = json!({ - "message": "invalid index uid `test test#!`, the uid must be an integer or a string containing only alphanumeric characters a-z A-Z 0-9, hyphens - and underscores _.", + "message": "`test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch-http/tests/index/delete_index.rs b/meilisearch-http/tests/index/delete_index.rs index f3cdf6631..b6efc7a68 100644 --- a/meilisearch-http/tests/index/delete_index.rs +++ b/meilisearch-http/tests/index/delete_index.rs @@ -44,7 +44,6 @@ async fn error_delete_unexisting_index() { } #[actix_rt::test] -#[cfg_attr(target_os = "windows", ignore)] async fn loop_delete_add_documents() { let server = Server::new().await; let index = server.index("test"); diff --git a/meilisearch-http/tests/index/get_index.rs b/meilisearch-http/tests/index/get_index.rs index 91cb1a6d5..3d3ba4b44 100644 --- a/meilisearch-http/tests/index/get_index.rs +++ b/meilisearch-http/tests/index/get_index.rs @@ -1,6 +1,6 @@ +use serde_json::{json, Value}; + use crate::common::Server; -use serde_json::json; -use serde_json::Value; #[actix_rt::test] async fn create_and_get_index() { @@ -63,12 +63,8 @@ async fn list_multiple_indexes() { assert!(response["results"].is_array()); let arr = response["results"].as_array().unwrap(); assert_eq!(arr.len(), 2); - assert!(arr - .iter() - .any(|entry| entry["uid"] == "test" && entry["primaryKey"] == Value::Null)); - assert!(arr - .iter() - .any(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key")); + assert!(arr.iter().any(|entry| entry["uid"] == "test" && entry["primaryKey"] == Value::Null)); + assert!(arr.iter().any(|entry| entry["uid"] == "test1" && entry["primaryKey"] == "key")); } #[actix_rt::test] @@ -77,10 +73,7 @@ async fn get_and_paginate_indexes() { const NB_INDEXES: usize = 50; for i in 0..NB_INDEXES { server.index(&format!("test_{i:02}")).create(None).await; - server - .index(&format!("test_{i:02}")) - .wait_task(i as u64) - .await; + server.index(&format!("test_{i:02}")).wait_task(i as u64).await; } // basic diff --git a/meilisearch-http/tests/index/stats.rs b/meilisearch-http/tests/index/stats.rs index f55998998..813f05b4a 100644 --- a/meilisearch-http/tests/index/stats.rs +++ b/meilisearch-http/tests/index/stats.rs @@ -17,10 +17,7 @@ async fn stats() { assert_eq!(code, 200); assert_eq!(response["numberOfDocuments"], 0); assert!(response["isIndexing"] == false); - assert!(response["fieldDistribution"] - .as_object() - .unwrap() - .is_empty()); + assert!(response["fieldDistribution"].as_object().unwrap().is_empty()); let documents = json!([ { diff --git a/meilisearch-http/tests/index/update_index.rs b/meilisearch-http/tests/index/update_index.rs index 48fde5608..3c283407c 100644 --- a/meilisearch-http/tests/index/update_index.rs +++ b/meilisearch-http/tests/index/update_index.rs @@ -1,6 +1,9 @@ -use crate::common::Server; use serde_json::json; -use time::{format_description::well_known::Rfc3339, OffsetDateTime}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; + +use crate::common::encoder::Encoder; +use crate::common::Server; #[actix_rt::test] async fn update_primary_key() { @@ -34,6 +37,22 @@ async fn update_primary_key() { assert_eq!(response.as_object().unwrap().len(), 4); } +#[actix_rt::test] +async fn create_and_update_with_different_encoding() { + let server = Server::new().await; + let index = server.index_with_encoder("test", Encoder::Gzip); + let (_, code) = index.create(None).await; + + assert_eq!(code, 202); + + let index = server.index_with_encoder("test", Encoder::Brotli); + index.update(Some("primary")).await; + + let response = index.wait_task(1).await; + + assert_eq!(response["status"], "succeeded"); +} + #[actix_rt::test] async fn update_nothing() { let server = Server::new().await; diff --git a/meilisearch-http/tests/search/errors.rs b/meilisearch-http/tests/search/errors.rs index 6b5569b58..add305083 100644 --- a/meilisearch-http/tests/search/errors.rs +++ b/meilisearch-http/tests/search/errors.rs @@ -1,7 +1,7 @@ -use crate::common::Server; use serde_json::json; use super::DOCUMENTS; +use crate::common::Server; #[actix_rt::test] async fn search_unexisting_index() { @@ -45,16 +45,14 @@ async fn search_invalid_highlight_and_crop_tags() { for field in fields { // object - let (response, code) = index - .search_post(json!({field.to_string(): {"marker": ""}})) - .await; + let (response, code) = + index.search_post(json!({field.to_string(): {"marker": ""}})).await; assert_eq!(code, 400, "field {} passing object: {}", &field, response); assert_eq!(response["code"], "bad_request"); // array - let (response, code) = index - .search_post(json!({field.to_string(): ["marker", ""]})) - .await; + let (response, code) = + index.search_post(json!({field.to_string(): ["marker", ""]})).await; assert_eq!(code, 400, "field {} passing array: {}", &field, response); assert_eq!(response["code"], "bad_request"); } @@ -65,16 +63,14 @@ async fn filter_invalid_syntax_object() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_filter" @@ -92,16 +88,14 @@ async fn filter_invalid_syntax_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_filter" @@ -119,9 +113,7 @@ async fn filter_invalid_syntax_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -134,13 +126,10 @@ async fn filter_invalid_syntax_string() { "link": "https://docs.meilisearch.com/errors#invalid_filter" }); index - .search( - json!({"filter": "title = Glass XOR title = Glass"}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) + .search(json!({"filter": "title = Glass XOR title = Glass"}), |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }) .await; } @@ -149,9 +138,7 @@ async fn filter_invalid_attribute_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -176,9 +163,7 @@ async fn filter_invalid_attribute_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -203,9 +188,7 @@ async fn filter_reserved_geo_attribute_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -230,9 +213,7 @@ async fn filter_reserved_geo_attribute_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -257,9 +238,7 @@ async fn filter_reserved_attribute_array() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -272,13 +251,10 @@ async fn filter_reserved_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_filter" }); index - .search( - json!({"filter": ["_geoDistance = Glass"]}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) + .search(json!({"filter": ["_geoDistance = Glass"]}), |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }) .await; } @@ -287,9 +263,7 @@ async fn filter_reserved_attribute_string() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -302,13 +276,10 @@ async fn filter_reserved_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_filter" }); index - .search( - json!({"filter": "_geoDistance = Glass"}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) + .search(json!({"filter": "_geoDistance = Glass"}), |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }) .await; } @@ -317,9 +288,7 @@ async fn sort_geo_reserved_attribute() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -349,9 +318,7 @@ async fn sort_reserved_attribute() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -381,9 +348,7 @@ async fn sort_unsortable_attribute() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -413,9 +378,7 @@ async fn sort_invalid_syntax() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; diff --git a/meilisearch-http/tests/search/formatted.rs b/meilisearch-http/tests/search/formatted.rs index 7303a7154..191720602 100644 --- a/meilisearch-http/tests/search/formatted.rs +++ b/meilisearch-http/tests/search/formatted.rs @@ -1,15 +1,14 @@ +use serde_json::json; + use super::*; use crate::common::Server; -use serde_json::json; #[actix_rt::test] async fn formatted_contain_wildcard() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({ "displayedAttributes": ["id", "cattos"] })) - .await; + index.update_settings(json!({ "displayedAttributes": ["id", "cattos"] })).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -34,19 +33,16 @@ async fn formatted_contain_wildcard() { .await; index - .search( - json!({ "q": "pesti", "attributesToRetrieve": ["*"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "cattos": "pesti", - }) - ); - }, - ) + .search(json!({ "q": "pesti", "attributesToRetrieve": ["*"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + }) + ); + }) .await; index @@ -91,23 +87,20 @@ async fn formatted_contain_wildcard() { .await; index - .search( - json!({ "q": "pesti", "attributesToCrop": ["*"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, + .search(json!({ "q": "pesti", "attributesToCrop": ["*"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "cattos": "pesti", + "_formatted": { + "id": "852", "cattos": "pesti", - "_formatted": { - "id": "852", - "cattos": "pesti", - } - }) - ); - }, - ) + } + }) + ); + }) .await; } @@ -121,27 +114,24 @@ async fn format_nested() { index.wait_task(0).await; index - .search( - json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "doggos": [ - { - "name": "bobby", - "age": 2, - }, - { - "name": "buddy", - "age": 4, - }, - ], - }) - ); - }, - ) + .search(json!({ "q": "pesti", "attributesToRetrieve": ["doggos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "doggos": [ + { + "name": "bobby", + "age": 2, + }, + { + "name": "buddy", + "age": 4, + }, + ], + }) + ); + }) .await; index @@ -297,9 +287,7 @@ async fn displayedattr_2_smol() { let index = server.index("test"); // not enough displayed for the other settings - index - .update_settings(json!({ "displayedAttributes": ["id"] })) - .await; + index.update_settings(json!({ "displayedAttributes": ["id"] })).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -319,36 +307,30 @@ async fn displayedattr_2_smol() { .await; index - .search( - json!({ "attributesToRetrieve": ["id"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); - }, - ) + .search(json!({ "attributesToRetrieve": ["id"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + }) .await; index - .search( - json!({ "attributesToHighlight": ["id"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - "_formatted": { - "id": "852", - } - }) - ); - }, - ) + .search(json!({ "attributesToHighlight": ["id"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + "_formatted": { + "id": "852", + } + }) + ); + }) .await; index @@ -385,43 +367,34 @@ async fn displayedattr_2_smol() { .await; index - .search( - json!({ "attributesToHighlight": ["cattos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); - }, - ) + .search(json!({ "attributesToHighlight": ["cattos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + }) .await; index - .search( - json!({ "attributesToCrop": ["cattos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!( - response["hits"][0], - json!({ - "id": 852, - }) - ); - }, - ) + .search(json!({ "attributesToCrop": ["cattos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!( + response["hits"][0], + json!({ + "id": 852, + }) + ); + }) .await; index - .search( - json!({ "attributesToRetrieve": ["cattos"] }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"][0], json!({})); - }, - ) + .search(json!({ "attributesToRetrieve": ["cattos"] }), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"][0], json!({})); + }) .await; index diff --git a/meilisearch-http/tests/search/mod.rs b/meilisearch-http/tests/search/mod.rs index d5e916860..44a4702d0 100644 --- a/meilisearch-http/tests/search/mod.rs +++ b/meilisearch-http/tests/search/mod.rs @@ -3,11 +3,13 @@ mod errors; mod formatted; +mod pagination; -use crate::common::Server; use once_cell::sync::Lazy; use serde_json::{json, Value}; +use crate::common::Server; + pub(self) static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { @@ -198,9 +200,7 @@ async fn search_with_filter_string_notation() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -220,9 +220,7 @@ async fn search_with_filter_string_notation() { let index = server.index("nested"); - index - .update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -261,9 +259,7 @@ async fn search_with_filter_array_notation() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -291,9 +287,7 @@ async fn search_with_sort_on_numbers() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -313,9 +307,7 @@ async fn search_with_sort_on_numbers() { let index = server.index("nested"); - index - .update_settings(json!({"sortableAttributes": ["doggos.age"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["doggos.age"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -339,9 +331,7 @@ async fn search_with_sort_on_strings() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["title"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -361,9 +351,7 @@ async fn search_with_sort_on_strings() { let index = server.index("nested"); - index - .update_settings(json!({"sortableAttributes": ["doggos.name"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["doggos.name"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -387,9 +375,7 @@ async fn search_with_multiple_sort() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"sortableAttributes": ["id", "title"]})) - .await; + index.update_settings(json!({"sortableAttributes": ["id", "title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -409,9 +395,7 @@ async fn search_facet_distribution() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({"filterableAttributes": ["title"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -433,9 +417,7 @@ async fn search_facet_distribution() { let index = server.index("nested"); - index - .update_settings(json!({"filterableAttributes": ["father", "doggos.name"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["father", "doggos.name"]})).await; let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, None).await; @@ -466,9 +448,7 @@ async fn search_facet_distribution() { ) .await; - index - .update_settings(json!({"filterableAttributes": ["doggos"]})) - .await; + index.update_settings(json!({"filterableAttributes": ["doggos"]})).await; index.wait_task(4).await; index @@ -501,10 +481,7 @@ async fn search_facet_distribution() { dist["doggos.name"], json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) ); - assert_eq!( - dist["doggos.age"], - json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1}) - ); + assert_eq!(dist["doggos.age"], json!({ "2": 1, "4": 1, "5": 1, "6": 1, "8": 1})); }, ) .await; @@ -515,17 +492,14 @@ async fn displayed_attributes() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({ "displayedAttributes": ["title"] })) - .await; + index.update_settings(json!({ "displayedAttributes": ["title"] })).await; let documents = DOCUMENTS.clone(); index.add_documents(documents, None).await; index.wait_task(1).await; - let (response, code) = index - .search_post(json!({ "attributesToRetrieve": ["title", "id"] })) - .await; + let (response, code) = + index.search_post(json!({ "attributesToRetrieve": ["title", "id"] })).await; assert_eq!(code, 200, "{}", response); assert!(response["hits"][0].get("title").is_some()); } @@ -535,9 +509,7 @@ async fn placeholder_search_is_hard_limited() { let server = Server::new().await; let index = server.index("test"); - let documents: Vec<_> = (0..1200) - .map(|i| json!({ "id": i, "text": "I am unique!" })) - .collect(); + let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); index.add_documents(documents.into(), None).await; index.wait_task(0).await; @@ -566,9 +538,7 @@ async fn placeholder_search_is_hard_limited() { ) .await; - index - .update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })) - .await; + index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; index.wait_task(1).await; index @@ -602,9 +572,7 @@ async fn search_is_hard_limited() { let server = Server::new().await; let index = server.index("test"); - let documents: Vec<_> = (0..1200) - .map(|i| json!({ "id": i, "text": "I am unique!" })) - .collect(); + let documents: Vec<_> = (0..1200).map(|i| json!({ "id": i, "text": "I am unique!" })).collect(); index.add_documents(documents.into(), None).await; index.wait_task(0).await; @@ -635,9 +603,7 @@ async fn search_is_hard_limited() { ) .await; - index - .update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })) - .await; + index.update_settings(json!({ "pagination": { "maxTotalHits": 10_000 } })).await; index.wait_task(1).await; index @@ -673,13 +639,9 @@ async fn faceting_max_values_per_facet() { let server = Server::new().await; let index = server.index("test"); - index - .update_settings(json!({ "filterableAttributes": ["number"] })) - .await; + index.update_settings(json!({ "filterableAttributes": ["number"] })).await; - let documents: Vec<_> = (0..10_000) - .map(|id| json!({ "id": id, "number": id * 10 })) - .collect(); + let documents: Vec<_> = (0..10_000).map(|id| json!({ "id": id, "number": id * 10 })).collect(); index.add_documents(json!(documents), None).await; index.wait_task(1).await; @@ -696,9 +658,7 @@ async fn faceting_max_values_per_facet() { ) .await; - index - .update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })) - .await; + index.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } })).await; index.wait_task(2).await; index diff --git a/meilisearch-http/tests/search/pagination.rs b/meilisearch-http/tests/search/pagination.rs new file mode 100644 index 000000000..1099200b8 --- /dev/null +++ b/meilisearch-http/tests/search/pagination.rs @@ -0,0 +1,113 @@ +use serde_json::json; + +use crate::common::Server; +use crate::search::DOCUMENTS; + +#[actix_rt::test] +async fn default_search_should_return_estimated_total_hit() { + let server = Server::new().await; + let index = server.index("basic"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert!(response.get("estimatedTotalHits").is_some()); + assert!(response.get("limit").is_some()); + assert!(response.get("offset").is_some()); + + // these fields shouldn't be present + assert!(response.get("totalHits").is_none()); + assert!(response.get("page").is_none()); + assert!(response.get("totalPages").is_none()); + }) + .await; +} + +#[actix_rt::test] +async fn simple_search() { + let server = Server::new().await; + let index = server.index("basic"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"page": 1}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 5); + assert!(response.get("totalHits").is_some()); + assert_eq!(response["page"], 1); + assert_eq!(response["totalPages"], 1); + + // these fields shouldn't be present + assert!(response.get("estimatedTotalHits").is_none()); + assert!(response.get("limit").is_none()); + assert!(response.get("offset").is_none()); + }) + .await; +} + +#[actix_rt::test] +async fn page_zero_should_not_return_any_result() { + let server = Server::new().await; + let index = server.index("basic"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"page": 0}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 0); + assert!(response.get("totalHits").is_some()); + assert_eq!(response["page"], 0); + assert_eq!(response["totalPages"], 1); + }) + .await; +} + +#[actix_rt::test] +async fn hits_per_page_1() { + let server = Server::new().await; + let index = server.index("basic"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"hitsPerPage": 1}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + assert_eq!(response["totalHits"], 5); + assert_eq!(response["page"], 1); + assert_eq!(response["totalPages"], 5); + }) + .await; +} + +#[actix_rt::test] +async fn hits_per_page_0_should_not_return_any_result() { + let server = Server::new().await; + let index = server.index("basic"); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"hitsPerPage": 0}), |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 0); + assert_eq!(response["totalHits"], 5); + assert_eq!(response["page"], 1); + assert_eq!(response["totalPages"], 0); + }) + .await; +} diff --git a/meilisearch-http/tests/settings/distinct.rs b/meilisearch-http/tests/settings/distinct.rs index d2dd0f74f..a60792329 100644 --- a/meilisearch-http/tests/settings/distinct.rs +++ b/meilisearch-http/tests/settings/distinct.rs @@ -1,23 +1,20 @@ -use crate::common::Server; use serde_json::json; +use crate::common::Server; + #[actix_rt::test] async fn set_and_reset_distinct_attribute() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index - .update_settings(json!({ "distinctAttribute": "test"})) - .await; + let (_response, _code) = index.update_settings(json!({ "distinctAttribute": "test"})).await; index.wait_task(0).await; let (response, _) = index.settings().await; assert_eq!(response["distinctAttribute"], "test"); - index - .update_settings(json!({ "distinctAttribute": null })) - .await; + index.update_settings(json!({ "distinctAttribute": null })).await; index.wait_task(1).await; diff --git a/meilisearch-http/tests/settings/get_settings.rs b/meilisearch-http/tests/settings/get_settings.rs index 9d10b7820..a3c667047 100644 --- a/meilisearch-http/tests/settings/get_settings.rs +++ b/meilisearch-http/tests/settings/get_settings.rs @@ -13,14 +13,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| map.insert("distinct_attribute", json!(Value::Null)); map.insert( "ranking_rules", - json!([ - "words", - "typo", - "proximity", - "attribute", - "sort", - "exactness" - ]), + json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]), ); map.insert("stop_words", json!([])); map.insert("synonyms", json!({})); @@ -63,14 +56,7 @@ async fn get_settings() { assert_eq!(settings["distinctAttribute"], json!(null)); assert_eq!( settings["rankingRules"], - json!([ - "words", - "typo", - "proximity", - "attribute", - "sort", - "exactness" - ]) + json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]) ); assert_eq!(settings["stopWords"], json!([])); assert_eq!( @@ -99,18 +85,14 @@ async fn error_update_settings_unknown_field() { async fn test_partial_update() { let server = Server::new().await; let index = server.index("test"); - let (_response, _code) = index - .update_settings(json!({"displayedAttributes": ["foo"]})) - .await; + let (_response, _code) = index.update_settings(json!({"displayedAttributes": ["foo"]})).await; index.wait_task(0).await; let (response, code) = index.settings().await; assert_eq!(code, 200); assert_eq!(response["displayedAttributes"], json!(["foo"])); assert_eq!(response["searchableAttributes"], json!(["*"])); - let (_response, _) = index - .update_settings(json!({"searchableAttributes": ["bar"]})) - .await; + let (_response, _) = index.update_settings(json!({"searchableAttributes": ["bar"]})).await; index.wait_task(1).await; let (response, code) = index.settings().await; @@ -158,10 +140,7 @@ async fn reset_all_settings() { assert_eq!(response["displayedAttributes"], json!(["name", "age"])); assert_eq!(response["searchableAttributes"], json!(["name"])); assert_eq!(response["stopWords"], json!(["the"])); - assert_eq!( - response["synonyms"], - json!({"puppy": ["dog", "doggo", "potat"] }) - ); + assert_eq!(response["synonyms"], json!({"puppy": ["dog", "doggo", "potat"] })); assert_eq!(response["filterableAttributes"], json!(["age"])); index.delete_settings().await; @@ -203,7 +182,7 @@ async fn error_update_setting_unexisting_index_invalid_uid() { assert_eq!(code, 400); let expected = json!({ - "message": "invalid index uid `test##! `, the uid must be an integer or a string containing only alphanumeric characters a-z A-Z 0-9, hyphens - and underscores _.", + "message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid"}); @@ -299,9 +278,8 @@ async fn error_set_invalid_ranking_rules() { let index = server.index("test"); index.create(None).await; - let (_response, _code) = index - .update_settings(json!({ "rankingRules": [ "manyTheFish"]})) - .await; + let (_response, _code) = + index.update_settings(json!({ "rankingRules": [ "manyTheFish"]})).await; index.wait_task(1).await; let (response, code) = index.get_task(1).await; diff --git a/meilisearch-http/tests/snapshot/mod.rs b/meilisearch-http/tests/snapshot/mod.rs index 27ff838e1..1c2e33534 100644 --- a/meilisearch-http/tests/snapshot/mod.rs +++ b/meilisearch-http/tests/snapshot/mod.rs @@ -1,11 +1,10 @@ use std::time::Duration; -use crate::common::server::default_settings; -use crate::common::GetAllDocumentsOptions; -use crate::common::Server; +use meilisearch_http::Opt; use tokio::time::sleep; -use meilisearch_http::Opt; +use crate::common::server::default_settings; +use crate::common::{GetAllDocumentsOptions, Server}; macro_rules! verify_snapshot { ( @@ -30,6 +29,7 @@ macro_rules! verify_snapshot { } #[actix_rt::test] +#[ignore] // TODO: unignore async fn perform_snapshot() { let temp = tempfile::tempdir().unwrap(); let snapshot_dir = tempfile::tempdir().unwrap(); @@ -62,10 +62,7 @@ async fn perform_snapshot() { let snapshot_path = snapshot_dir.path().to_owned().join("db.snapshot"); - let options = Opt { - import_snapshot: Some(snapshot_path), - ..default_settings(temp.path()) - }; + let options = Opt { import_snapshot: Some(snapshot_path), ..default_settings(temp.path()) }; let snapshot_server = Server::new_with_options(options).await.unwrap(); diff --git a/meilisearch-http/tests/stats/mod.rs b/meilisearch-http/tests/stats/mod.rs index 0629c2e29..152e4f625 100644 --- a/meilisearch-http/tests/stats/mod.rs +++ b/meilisearch-http/tests/stats/mod.rs @@ -1,5 +1,6 @@ use serde_json::json; -use time::{format_description::well_known::Rfc3339, OffsetDateTime}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; use crate::common::Server; diff --git a/meilisearch-http/tests/tasks/mod.rs b/meilisearch-http/tests/tasks/mod.rs index 785e0284e..548fa90be 100644 --- a/meilisearch-http/tests/tasks/mod.rs +++ b/meilisearch-http/tests/tasks/mod.rs @@ -1,8 +1,10 @@ -use crate::common::Server; +use meili_snap::insta::{self, assert_json_snapshot}; use serde_json::json; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; +use crate::common::Server; + #[actix_rt::test] async fn error_get_unexisting_task_status() { let server = Server::new().await; @@ -49,10 +51,7 @@ async fn list_tasks() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.list_tasks().await; assert_eq!(code, 200); @@ -66,44 +65,39 @@ async fn list_tasks_with_star_filters() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; - let (response, code) = index.service.get("/tasks?indexUid=test").await; + let (response, code) = index.service.get("/tasks?indexUids=test").await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 2); - let (response, code) = index.service.get("/tasks?indexUid=*").await; + let (response, code) = index.service.get("/tasks?indexUids=*").await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 2); - let (response, code) = index.service.get("/tasks?indexUid=*,pasteque").await; + let (response, code) = index.service.get("/tasks?indexUids=*,pasteque").await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 2); - let (response, code) = index.service.get("/tasks?type=*").await; + let (response, code) = index.service.get("/tasks?types=*").await; assert_eq!(code, 200); assert_eq!(response["results"].as_array().unwrap().len(), 2); + let (response, code) = + index.service.get("/tasks?types=*,documentAdditionOrUpdate&statuses=*").await; + assert_eq!(code, 200, "{:?}", response); + assert_eq!(response["results"].as_array().unwrap().len(), 2); + let (response, code) = index .service - .get("/tasks?type=*,documentAdditionOrUpdate&status=*") + .get("/tasks?types=*,documentAdditionOrUpdate&statuses=*,failed&indexUids=test") .await; assert_eq!(code, 200, "{:?}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); let (response, code) = index .service - .get("/tasks?type=*,documentAdditionOrUpdate&status=*,failed&indexUid=test") - .await; - assert_eq!(code, 200, "{:?}", response); - assert_eq!(response["results"].as_array().unwrap().len(), 2); - - let (response, code) = index - .service - .get("/tasks?type=*,documentAdditionOrUpdate&status=*,failed&indexUid=test,*") + .get("/tasks?types=*,documentAdditionOrUpdate&statuses=*,failed&indexUids=test,*") .await; assert_eq!(code, 200, "{:?}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); @@ -116,10 +110,7 @@ async fn list_tasks_status_filtered() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.filtered_tasks(&[], &["succeeded"]).await; @@ -145,19 +136,15 @@ async fn list_tasks_type_filtered() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.filtered_tasks(&["indexCreation"], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 1); - let (response, code) = index - .filtered_tasks(&["indexCreation", "documentAdditionOrUpdate"], &[]) - .await; + let (response, code) = + index.filtered_tasks(&["indexCreation", "documentAdditionOrUpdate"], &[]).await; assert_eq!(code, 200, "{}", response); assert_eq!(response["results"].as_array().unwrap().len(), 2); } @@ -169,10 +156,7 @@ async fn list_tasks_status_and_type_filtered() { index.create(None).await; index.wait_task(0).await; index - .add_documents( - serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), - None, - ) + .add_documents(serde_json::from_str(include_str!("../assets/test_set.json")).unwrap(), None) .await; let (response, code) = index.filtered_tasks(&["indexCreation"], &["failed"]).await; @@ -189,6 +173,131 @@ async fn list_tasks_status_and_type_filtered() { assert_eq!(response["results"].as_array().unwrap().len(), 2); } +#[actix_rt::test] +async fn get_task_filter_error() { + let server = Server::new().await; + + let (response, code) = server.tasks_filter(json!( { "lol": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Query deserialize error: unknown field `lol`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + let (response, code) = server.tasks_filter(json!( { "uids": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Task uid `pied` is invalid. It should only contain numeric characters.", + "code": "invalid_task_uids_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_task_uids_filter" + } + "###); + + let (response, code) = server.tasks_filter(json!( { "from": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Query deserialize error: invalid digit found in string", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + let (response, code) = server.tasks_filter(json!( { "beforeStartedAt": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Task `beforeStartedAt` `pied` is invalid. It should follow the YYYY-MM-DD or RFC 3339 date-time format.", + "code": "invalid_task_date_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_task_date_filter" + } + "###); +} + +#[actix_rt::test] +async fn delete_task_filter_error() { + let server = Server::new().await; + + let (response, code) = server.delete_tasks(json!(null)).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.", + "code": "missing_task_filters", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_task_filters" + } + "###); + + let (response, code) = server.delete_tasks(json!({ "lol": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Query deserialize error: unknown field `lol`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + let (response, code) = server.delete_tasks(json!({ "uids": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Task uid `pied` is invalid. It should only contain numeric characters.", + "code": "invalid_task_uids_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_task_uids_filter" + } + "###); +} + +#[actix_rt::test] +async fn cancel_task_filter_error() { + let server = Server::new().await; + + let (response, code) = server.cancel_tasks(json!(null)).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.", + "code": "missing_task_filters", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#missing_task_filters" + } + "###); + + let (response, code) = server.cancel_tasks(json!({ "lol": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Query deserialize error: unknown field `lol`", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + let (response, code) = server.cancel_tasks(json!({ "uids": "pied" })).await; + assert_eq!(code, 400, "{}", response); + insta::assert_json_snapshot!(response, @r###" + { + "message": "Task uid `pied` is invalid. It should only contain numeric characters.", + "code": "invalid_task_uids_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_task_uids_filter" + } + "###); +} + macro_rules! assert_valid_summarized_task { ($response:expr, $task_type:literal, $index:literal) => {{ assert_eq!($response.as_object().unwrap().len(), 5); @@ -231,3 +340,665 @@ async fn test_summarized_task_view() { let (response, _) = index.delete().await; assert_valid_summarized_task!(response, "indexDeletion", "test"); } + +#[actix_web::test] +async fn test_summarized_document_addition_or_update() { + let server = Server::new().await; + let index = server.index("test"); + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), None).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_delete_batch() { + let server = Server::new().await; + let index = server.index("test"); + index.delete_batch(vec![1, 2, 3]).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 3, + "deletedDocuments": 0 + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(None).await; + index.delete_batch(vec![42]).await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 1, + "deletedDocuments": 0 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_delete_document() { + let server = Server::new().await; + let index = server.index("test"); + index.delete_document(1).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 1, + "deletedDocuments": 0 + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(None).await; + index.delete_document(42).await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "documentDeletion", + "canceledBy": null, + "details": { + "providedIds": 1, + "deletedDocuments": 0 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_settings_update() { + let server = Server::new().await; + let index = server.index("test"); + // here we should find my payload even in the failed task. + index.update_settings(json!({ "rankingRules": ["custom"] })).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + dbg!(&task); + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "rankingRules": [ + "custom" + ] + }, + "error": { + "message": "`custom` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.", + "code": "invalid_ranking_rule", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_ranking_rule" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update_settings(json!({ "displayedAttributes": ["doggos", "name"], "filterableAttributes": ["age", "nb_paw_pads"], "sortableAttributes": ["iq"] })).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "displayedAttributes": [ + "doggos", + "name" + ], + "filterableAttributes": [ + "age", + "nb_paw_pads" + ], + "sortableAttributes": [ + "iq" + ] + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_creation() { + let server = Server::new().await; + let index = server.index("test"); + index.create(None).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "succeeded", + "type": "indexCreation", + "canceledBy": null, + "details": { + "primaryKey": null + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.create(Some("doggos")).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "failed", + "type": "indexCreation", + "canceledBy": null, + "details": { + "primaryKey": "doggos" + }, + "error": { + "message": "Index `test` already exists.", + "code": "index_already_exists", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_already_exists" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_deletion() { + let server = Server::new().await; + let index = server.index("test"); + index.delete().await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "indexDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 0 + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // is the details correctly set when documents are actually deleted. + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + index.delete().await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "indexDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // What happens when you delete an index that doesn't exists. + index.delete().await; + index.wait_task(2).await; + let (task, _) = index.get_task(2).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 2, + "indexUid": "test", + "status": "succeeded", + "type": "indexDeletion", + "canceledBy": null, + "details": { + "deletedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_update() { + let server = Server::new().await; + let index = server.index("test"); + // If the index doesn't exist yet, we should get errors with or without the primary key. + index.update(None).await; + index.wait_task(0).await; + let (task, _) = index.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "failed", + "type": "indexUpdate", + "canceledBy": null, + "details": { + "primaryKey": null + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update(Some("bones")).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "failed", + "type": "indexUpdate", + "canceledBy": null, + "details": { + "primaryKey": "bones" + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // And run the same two tests once the index do exists. + index.create(None).await; + + index.update(None).await; + index.wait_task(3).await; + let (task, _) = index.get_task(3).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 3, + "indexUid": "test", + "status": "succeeded", + "type": "indexUpdate", + "canceledBy": null, + "details": { + "primaryKey": null + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + index.update(Some("bones")).await; + index.wait_task(4).await; + let (task, _) = index.get_task(4).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 4, + "indexUid": "test", + "status": "succeeded", + "type": "indexUpdate", + "canceledBy": null, + "details": { + "primaryKey": "bones" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_index_swap() { + let server = Server::new().await; + server + .index_swap(json!([ + { "indexes": ["doggos", "cattos"] } + ])) + .await; + server.wait_task(0).await; + let (task, _) = server.get_task(0).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": null, + "status": "failed", + "type": "indexSwap", + "canceledBy": null, + "details": { + "swaps": [ + { + "indexes": [ + "doggos", + "cattos" + ] + } + ] + }, + "error": { + "message": "Indexes `cattos`, `doggos` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + server.index("doggos").create(None).await; + server.index("cattos").create(None).await; + server + .index_swap(json!([ + { "indexes": ["doggos", "cattos"] } + ])) + .await; + server.wait_task(3).await; + let (task, _) = server.get_task(3).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 3, + "indexUid": null, + "status": "succeeded", + "type": "indexSwap", + "canceledBy": null, + "details": { + "swaps": [ + { + "indexes": [ + "doggos", + "cattos" + ] + } + ] + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_task_cancelation() { + let server = Server::new().await; + let index = server.index("doggos"); + // to avoid being flaky we're only going to cancel an already finished task :( + index.create(None).await; + index.wait_task(0).await; + server.cancel_tasks(json!({ "uids": [0] })).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": null, + "status": "succeeded", + "type": "taskCancelation", + "canceledBy": null, + "details": { + "matchedTasks": 1, + "canceledTasks": 0, + "originalFilter": "?uids=0" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_task_deletion() { + let server = Server::new().await; + let index = server.index("doggos"); + // to avoid being flaky we're only going to delete an already finished task :( + index.create(None).await; + index.wait_task(0).await; + server.delete_tasks(json!({ "uids": [0] })).await; + index.wait_task(1).await; + let (task, _) = index.get_task(1).await; + assert_json_snapshot!(task, + { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 1, + "indexUid": null, + "status": "succeeded", + "type": "taskDeletion", + "canceledBy": null, + "details": { + "matchedTasks": 1, + "deletedTasks": 1, + "originalFilter": "?uids=0" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_web::test] +async fn test_summarized_dump_creation() { + let server = Server::new().await; + server.create_dump().await; + server.wait_task(0).await; + let (task, _) = server.get_task(0).await; + assert_json_snapshot!(task, + { ".details.dumpUid" => "[dumpUid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + @r###" + { + "uid": 0, + "indexUid": null, + "status": "succeeded", + "type": "dumpCreation", + "canceledBy": null, + "details": { + "dumpUid": "[dumpUid]" + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} diff --git a/meilisearch-lib/proptest-regressions/index_resolver/mod.txt b/meilisearch-lib/proptest-regressions/index_resolver/mod.txt deleted file mode 100644 index 553b8f1d5..000000000 --- a/meilisearch-lib/proptest-regressions/index_resolver/mod.txt +++ /dev/null @@ -1,19 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc 6f3ae3cba934ba3e328e2306218c32f27a46ce2d54a1258b05fef65663208662 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentAddition { content_uuid: 37bc137d-2038-47f0-819f-b133233daadc, merge_strategy: ReplaceDocuments, primary_key: None, documents_count: 0 }, events: [] } -cc b726f7d9f44a9216aad302ddba0f04e7108817e741d656a4759aea8562de4d63 # shrinks to task = Task { id: 0, index_uid: IndexUid("_"), content: IndexDeletion, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc 427ec2dde3260b1ab334207bdc22adef28a5b8532b9902c84b55fd2c017ea7e1 # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = false, any_int = 0 -cc c24f3d42f0f36fbdbf4e9d4327e75529b163ac580d63a5934ca05e9b5bd23a65 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: IndexDeletion, events: [] }, index_exists = true, index_op_fails = true, any_int = 0 -cc 8084e2410801b997533b0bcbad75cd212873cfc2677f26847f68c568ead1604c # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: false }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc 330085e0200a9a2ddfdd764a03d768aa95c431bcaafbd530c8c949425beed18b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0 -cc c70e901576ef2fb9622e814bdecd11e4747cd70d71a9a6ce771b5b7256a187c0 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: SettingsUpdate { settings: Settings { displayed_attributes: NotSet, searchable_attributes: NotSet, filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, synonyms: NotSet, distinct_attribute: NotSet, _kind: PhantomData }, is_deletion: true }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc 3fe2c38cbc2cca34ecde321472141d386056f0cd332cbf700773657715a382b5 # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc c31cf86692968483f1ab08a6a9d4667ccb9635c306998551bf1eb1f135ef0d4b # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: UpdateIndex { primary_key: Some("") }, events: [] }, index_exists = true, index_op_fails = false, any_int = 0 -cc 3a01c78db082434b8a4f8914abf0d1059d39f4426d16df20d72e1bd7ebb94a6a # shrinks to task = Task { id: 0, index_uid: IndexUid("0"), content: UpdateIndex { primary_key: None }, events: [] }, index_exists = true, index_op_fails = true, any_int = 0 -cc c450806df3921d1e6fe9b6af93d999e8196d0175b69b64f1810802582421e94a # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = false, any_int = 0 -cc fb6b98947cbdbdee05ed3c0bf2923aad2c311edc276253642eb43a0c0ec4888a # shrinks to task = Task { id: 0, index_uid: IndexUid("A"), content: CreateIndex { primary_key: Some("") }, events: [] }, index_exists = false, index_op_fails = true, any_int = 0 -cc 1aa59d8e22484e9915efbb5818e1e1ab684aa61b166dc82130d6221663ba00bf # shrinks to task = Task { id: 0, index_uid: IndexUid("a"), content: DocumentDeletion(Clear), events: [] }, index_exists = true, index_op_fails = false, any_int = 0 diff --git a/meilisearch-lib/proptest-regressions/tasks/task_store/store.txt b/meilisearch-lib/proptest-regressions/tasks/task_store/store.txt deleted file mode 100644 index a857bfbe4..000000000 --- a/meilisearch-lib/proptest-regressions/tasks/task_store/store.txt +++ /dev/null @@ -1,7 +0,0 @@ -# Seeds for failure cases proptest has generated in the past. It is -# automatically read and these particular cases re-run before any -# novel cases are generated. -# -# It is recommended to check this file in to source control so that -# everyone who runs the test benefits from these saved cases. -cc 8cbd6c45ce8c5611ec3f2f94fd485f6a8eeccc470fa426e59bdfd4d9e7fce0e1 # shrinks to bytes = [] diff --git a/meilisearch-lib/src/analytics.rs b/meilisearch-lib/src/analytics.rs deleted file mode 100644 index adfddf998..000000000 --- a/meilisearch-lib/src/analytics.rs +++ /dev/null @@ -1,8 +0,0 @@ -use std::{fs, path::Path}; - -/// Copy the `instance-uid` contained in one db to another. Ignore all errors. -pub fn copy_user_id(src: &Path, dst: &Path) { - if let Ok(user_id) = fs::read_to_string(src.join("instance-uid")) { - let _ = fs::write(dst.join("instance-uid"), &user_id); - } -} diff --git a/meilisearch-lib/src/dump/compat/mod.rs b/meilisearch-lib/src/dump/compat/mod.rs deleted file mode 100644 index 9abac24c7..000000000 --- a/meilisearch-lib/src/dump/compat/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -pub mod v2; -pub mod v3; -pub mod v4; - -/// Parses the v1 version of the Asc ranking rules `asc(price)`and returns the field name. -pub fn asc_ranking_rule(text: &str) -> Option<&str> { - text.split_once("asc(") - .and_then(|(_, tail)| tail.rsplit_once(')')) - .map(|(field, _)| field) -} - -/// Parses the v1 version of the Desc ranking rules `desc(price)`and returns the field name. -pub fn desc_ranking_rule(text: &str) -> Option<&str> { - text.split_once("desc(") - .and_then(|(_, tail)| tail.rsplit_once(')')) - .map(|(field, _)| field) -} diff --git a/meilisearch-lib/src/dump/compat/v2.rs b/meilisearch-lib/src/dump/compat/v2.rs deleted file mode 100644 index 364d894c4..000000000 --- a/meilisearch-lib/src/dump/compat/v2.rs +++ /dev/null @@ -1,152 +0,0 @@ -use anyhow::bail; -use meilisearch_types::error::Code; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::index::{Settings, Unchecked}; - -#[derive(Serialize, Deserialize)] -pub struct UpdateEntry { - pub uuid: Uuid, - pub update: UpdateStatus, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateFormat { - Json, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct DocumentAdditionResult { - pub nb_documents: usize, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum UpdateResult { - DocumentsAddition(DocumentAdditionResult), - DocumentDeletion { deleted: u64 }, - Other, -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum UpdateMeta { - DocumentsAddition { - method: IndexDocumentsMethod, - format: UpdateFormat, - primary_key: Option, - }, - ClearDocuments, - DeleteDocuments { - ids: Vec, - }, - Settings(Settings), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Enqueued { - pub update_id: u64, - pub meta: UpdateMeta, - #[serde(with = "time::serde::rfc3339")] - pub enqueued_at: OffsetDateTime, - pub content: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processed { - pub success: UpdateResult, - #[serde(with = "time::serde::rfc3339")] - pub processed_at: OffsetDateTime, - #[serde(flatten)] - pub from: Processing, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processing { - #[serde(flatten)] - pub from: Enqueued, - #[serde(with = "time::serde::rfc3339")] - pub started_processing_at: OffsetDateTime, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Aborted { - #[serde(flatten)] - pub from: Enqueued, - #[serde(with = "time::serde::rfc3339")] - pub aborted_at: OffsetDateTime, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Failed { - #[serde(flatten)] - pub from: Processing, - pub error: ResponseError, - #[serde(with = "time::serde::rfc3339")] - pub failed_at: OffsetDateTime, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "status", rename_all = "camelCase")] -pub enum UpdateStatus { - Processing(Processing), - Enqueued(Enqueued), - Processed(Processed), - Aborted(Aborted), - Failed(Failed), -} - -type StatusCode = (); - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct ResponseError { - #[serde(skip)] - pub code: StatusCode, - pub message: String, - pub error_code: String, - pub error_type: String, - pub error_link: String, -} - -pub fn error_code_from_str(s: &str) -> anyhow::Result { - let code = match s { - "index_creation_failed" => Code::CreateIndex, - "index_already_exists" => Code::IndexAlreadyExists, - "index_not_found" => Code::IndexNotFound, - "invalid_index_uid" => Code::InvalidIndexUid, - "invalid_state" => Code::InvalidState, - "missing_primary_key" => Code::MissingPrimaryKey, - "primary_key_already_present" => Code::PrimaryKeyAlreadyPresent, - "invalid_request" => Code::InvalidRankingRule, - "max_fields_limit_exceeded" => Code::MaxFieldsLimitExceeded, - "missing_document_id" => Code::MissingDocumentId, - "invalid_facet" => Code::Filter, - "invalid_filter" => Code::Filter, - "invalid_sort" => Code::Sort, - "bad_parameter" => Code::BadParameter, - "bad_request" => Code::BadRequest, - "document_not_found" => Code::DocumentNotFound, - "internal" => Code::Internal, - "invalid_geo_field" => Code::InvalidGeoField, - "invalid_token" => Code::InvalidToken, - "missing_authorization_header" => Code::MissingAuthorizationHeader, - "payload_too_large" => Code::PayloadTooLarge, - "unretrievable_document" => Code::RetrieveDocument, - "search_error" => Code::SearchDocuments, - "unsupported_media_type" => Code::UnsupportedMediaType, - "dump_already_in_progress" => Code::DumpAlreadyInProgress, - "dump_process_failed" => Code::DumpProcessFailed, - _ => bail!("unknow error code."), - }; - - Ok(code) -} diff --git a/meilisearch-lib/src/dump/compat/v3.rs b/meilisearch-lib/src/dump/compat/v3.rs deleted file mode 100644 index 61e31eccd..000000000 --- a/meilisearch-lib/src/dump/compat/v3.rs +++ /dev/null @@ -1,205 +0,0 @@ -use meilisearch_types::error::{Code, ResponseError}; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use super::v4::{Task, TaskContent, TaskEvent}; -use crate::index::{Settings, Unchecked}; -use crate::tasks::task::{DocumentDeletion, TaskId, TaskResult}; - -use super::v2; - -#[derive(Serialize, Deserialize)] -pub struct DumpEntry { - pub uuid: Uuid, - pub uid: String, -} - -#[derive(Serialize, Deserialize)] -pub struct UpdateEntry { - pub uuid: Uuid, - pub update: UpdateStatus, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "status", rename_all = "camelCase")] -pub enum UpdateStatus { - Processing(Processing), - Enqueued(Enqueued), - Processed(Processed), - Failed(Failed), -} - -impl From for TaskResult { - fn from(other: v2::UpdateResult) -> Self { - match other { - v2::UpdateResult::DocumentsAddition(result) => TaskResult::DocumentAddition { - indexed_documents: result.nb_documents as u64, - }, - v2::UpdateResult::DocumentDeletion { deleted } => TaskResult::DocumentDeletion { - deleted_documents: deleted, - }, - v2::UpdateResult::Other => TaskResult::Other, - } - } -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum Update { - DeleteDocuments(Vec), - DocumentAddition { - primary_key: Option, - method: IndexDocumentsMethod, - content_uuid: Uuid, - }, - Settings(Settings), - ClearDocuments, -} - -impl From for super::v4::TaskContent { - fn from(update: Update) -> Self { - match update { - Update::DeleteDocuments(ids) => { - TaskContent::DocumentDeletion(DocumentDeletion::Ids(ids)) - } - Update::DocumentAddition { - primary_key, - method, - .. - } => TaskContent::DocumentAddition { - content_uuid: Uuid::default(), - merge_strategy: method, - primary_key, - // document count is unknown for legacy updates - documents_count: 0, - allow_index_creation: true, - }, - Update::Settings(settings) => TaskContent::SettingsUpdate { - settings, - // There is no way to know now, so we assume it isn't - is_deletion: false, - allow_index_creation: true, - }, - Update::ClearDocuments => TaskContent::DocumentDeletion(DocumentDeletion::Clear), - } - } -} - -#[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(tag = "type")] -pub enum UpdateMeta { - DocumentsAddition { - method: IndexDocumentsMethod, - primary_key: Option, - }, - ClearDocuments, - DeleteDocuments { - ids: Vec, - }, - Settings(Settings), -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Enqueued { - pub update_id: u64, - pub meta: Update, - #[serde(with = "time::serde::rfc3339")] - pub enqueued_at: OffsetDateTime, -} - -impl Enqueued { - fn update_task(self, task: &mut Task) { - // we do not erase the `TaskId` that was given to us. - task.content = self.meta.into(); - task.events.push(TaskEvent::Created(self.enqueued_at)); - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processed { - pub success: v2::UpdateResult, - #[serde(with = "time::serde::rfc3339")] - pub processed_at: OffsetDateTime, - #[serde(flatten)] - pub from: Processing, -} - -impl Processed { - fn update_task(self, task: &mut Task) { - self.from.update_task(task); - - let event = TaskEvent::Succeded { - result: TaskResult::from(self.success), - timestamp: self.processed_at, - }; - task.events.push(event); - } -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct Processing { - #[serde(flatten)] - pub from: Enqueued, - #[serde(with = "time::serde::rfc3339")] - pub started_processing_at: OffsetDateTime, -} - -impl Processing { - fn update_task(self, task: &mut Task) { - self.from.update_task(task); - - let event = TaskEvent::Processing(self.started_processing_at); - task.events.push(event); - } -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct Failed { - #[serde(flatten)] - pub from: Processing, - pub msg: String, - pub code: Code, - #[serde(with = "time::serde::rfc3339")] - pub failed_at: OffsetDateTime, -} - -impl Failed { - fn update_task(self, task: &mut Task) { - self.from.update_task(task); - - let event = TaskEvent::Failed { - error: ResponseError::from_msg(self.msg, self.code), - timestamp: self.failed_at, - }; - task.events.push(event); - } -} - -impl From<(UpdateStatus, String, TaskId)> for Task { - fn from((update, uid, task_id): (UpdateStatus, String, TaskId)) -> Self { - // Dummy task - let mut task = super::v4::Task { - id: task_id, - index_uid: IndexUid::new_unchecked(uid), - content: super::v4::TaskContent::IndexDeletion, - events: Vec::new(), - }; - - match update { - UpdateStatus::Processing(u) => u.update_task(&mut task), - UpdateStatus::Enqueued(u) => u.update_task(&mut task), - UpdateStatus::Processed(u) => u.update_task(&mut task), - UpdateStatus::Failed(u) => u.update_task(&mut task), - } - - task - } -} diff --git a/meilisearch-lib/src/dump/compat/v4.rs b/meilisearch-lib/src/dump/compat/v4.rs deleted file mode 100644 index c412e7f17..000000000 --- a/meilisearch-lib/src/dump/compat/v4.rs +++ /dev/null @@ -1,145 +0,0 @@ -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use crate::index::{Settings, Unchecked}; -use crate::tasks::batch::BatchId; -use crate::tasks::task::{ - DocumentDeletion, TaskContent as NewTaskContent, TaskEvent as NewTaskEvent, TaskId, TaskResult, -}; - -#[derive(Debug, Serialize, Deserialize)] -pub struct Task { - pub id: TaskId, - pub index_uid: IndexUid, - pub content: TaskContent, - pub events: Vec, -} - -impl From for crate::tasks::task::Task { - fn from(other: Task) -> Self { - Self { - id: other.id, - content: NewTaskContent::from((other.index_uid, other.content)), - events: other.events.into_iter().map(Into::into).collect(), - } - } -} - -#[derive(Debug, Serialize, Deserialize)] -pub enum TaskEvent { - Created(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), - Batched { - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - batch_id: BatchId, - }, - Processing(#[serde(with = "time::serde::rfc3339")] OffsetDateTime), - Succeded { - result: TaskResult, - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, - Failed { - error: ResponseError, - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, -} - -impl From for NewTaskEvent { - fn from(other: TaskEvent) -> Self { - match other { - TaskEvent::Created(x) => NewTaskEvent::Created(x), - TaskEvent::Batched { - timestamp, - batch_id, - } => NewTaskEvent::Batched { - timestamp, - batch_id, - }, - TaskEvent::Processing(x) => NewTaskEvent::Processing(x), - TaskEvent::Succeded { result, timestamp } => { - NewTaskEvent::Succeeded { result, timestamp } - } - TaskEvent::Failed { error, timestamp } => NewTaskEvent::Failed { error, timestamp }, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] -#[allow(clippy::large_enum_variant)] -pub enum TaskContent { - DocumentAddition { - content_uuid: Uuid, - merge_strategy: IndexDocumentsMethod, - primary_key: Option, - documents_count: usize, - allow_index_creation: bool, - }, - DocumentDeletion(DocumentDeletion), - SettingsUpdate { - settings: Settings, - /// Indicates whether the task was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - IndexDeletion, - IndexCreation { - primary_key: Option, - }, - IndexUpdate { - primary_key: Option, - }, - Dump { - uid: String, - }, -} - -impl From<(IndexUid, TaskContent)> for NewTaskContent { - fn from((index_uid, content): (IndexUid, TaskContent)) -> Self { - match content { - TaskContent::DocumentAddition { - content_uuid, - merge_strategy, - primary_key, - documents_count, - allow_index_creation, - } => NewTaskContent::DocumentAddition { - index_uid, - content_uuid, - merge_strategy, - primary_key, - documents_count, - allow_index_creation, - }, - TaskContent::DocumentDeletion(deletion) => NewTaskContent::DocumentDeletion { - index_uid, - deletion, - }, - TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - } => NewTaskContent::SettingsUpdate { - index_uid, - settings, - is_deletion, - allow_index_creation, - }, - TaskContent::IndexDeletion => NewTaskContent::IndexDeletion { index_uid }, - TaskContent::IndexCreation { primary_key } => NewTaskContent::IndexCreation { - index_uid, - primary_key, - }, - TaskContent::IndexUpdate { primary_key } => NewTaskContent::IndexUpdate { - index_uid, - primary_key, - }, - TaskContent::Dump { uid } => NewTaskContent::Dump { uid }, - } - } -} diff --git a/meilisearch-lib/src/dump/handler.rs b/meilisearch-lib/src/dump/handler.rs deleted file mode 100644 index 069196451..000000000 --- a/meilisearch-lib/src/dump/handler.rs +++ /dev/null @@ -1,188 +0,0 @@ -#[cfg(not(test))] -pub use real::DumpHandler; - -#[cfg(test)] -pub use test::MockDumpHandler as DumpHandler; - -use time::{macros::format_description, OffsetDateTime}; - -/// Generate uid from creation date -pub fn generate_uid() -> String { - OffsetDateTime::now_utc() - .format(format_description!( - "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]" - )) - .unwrap() -} - -mod real { - use std::path::PathBuf; - use std::sync::Arc; - - use log::{info, trace}; - use meilisearch_auth::AuthController; - use milli::heed::Env; - use tokio::fs::create_dir_all; - use tokio::io::AsyncWriteExt; - - use crate::analytics; - use crate::compression::to_tar_gz; - use crate::dump::error::{DumpError, Result}; - use crate::dump::{MetadataVersion, META_FILE_NAME}; - use crate::index_resolver::{ - index_store::IndexStore, meta_store::IndexMetaStore, IndexResolver, - }; - use crate::tasks::TaskStore; - use crate::update_file_store::UpdateFileStore; - - pub struct DumpHandler { - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - task_store_size: usize, - index_db_size: usize, - env: Arc, - index_resolver: Arc>, - } - - impl DumpHandler - where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, - { - pub fn new( - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - task_store_size: usize, - index_db_size: usize, - env: Arc, - index_resolver: Arc>, - ) -> Self { - Self { - dump_path, - db_path, - update_file_store, - task_store_size, - index_db_size, - env, - index_resolver, - } - } - - pub async fn run(&self, uid: String) -> Result<()> { - trace!("Performing dump."); - - create_dir_all(&self.dump_path).await?; - - let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; - let temp_dump_path = temp_dump_dir.path().to_owned(); - - let meta = MetadataVersion::new_v5(self.index_db_size, self.task_store_size); - let meta_path = temp_dump_path.join(META_FILE_NAME); - - let meta_bytes = serde_json::to_vec(&meta)?; - let mut meta_file = tokio::fs::File::create(&meta_path).await?; - meta_file.write_all(&meta_bytes).await?; - - analytics::copy_user_id(&self.db_path, &temp_dump_path); - - create_dir_all(&temp_dump_path.join("indexes")).await?; - - let db_path = self.db_path.clone(); - let temp_dump_path_clone = temp_dump_path.clone(); - tokio::task::spawn_blocking(move || -> Result<()> { - AuthController::dump(db_path, temp_dump_path_clone)?; - Ok(()) - }) - .await??; - TaskStore::dump( - self.env.clone(), - &temp_dump_path, - self.update_file_store.clone(), - ) - .await?; - self.index_resolver.dump(&temp_dump_path).await?; - - let dump_path = self.dump_path.clone(); - let dump_path = tokio::task::spawn_blocking(move || -> Result { - // for now we simply copy the updates/updates_files - // FIXME: We may copy more files than necessary, if new files are added while we are - // performing the dump. We need a way to filter them out. - - let temp_dump_file = tempfile::NamedTempFile::new_in(&dump_path)?; - to_tar_gz(temp_dump_path, temp_dump_file.path()) - .map_err(|e| DumpError::Internal(e.into()))?; - - let dump_path = dump_path.join(uid).with_extension("dump"); - temp_dump_file.persist(&dump_path)?; - - Ok(dump_path) - }) - .await??; - - info!("Created dump in {:?}.", dump_path); - - Ok(()) - } - } -} - -#[cfg(test)] -mod test { - use std::path::PathBuf; - use std::sync::Arc; - - use milli::heed::Env; - use nelson::Mocker; - - use crate::dump::error::Result; - use crate::index_resolver::IndexResolver; - use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; - use crate::update_file_store::UpdateFileStore; - - use super::*; - - pub enum MockDumpHandler { - Real(super::real::DumpHandler), - Mock(Mocker), - } - - impl MockDumpHandler { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(mocker) - } - } - - impl MockDumpHandler - where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, - { - pub fn new( - dump_path: PathBuf, - db_path: PathBuf, - update_file_store: UpdateFileStore, - task_store_size: usize, - index_db_size: usize, - env: Arc, - index_resolver: Arc>, - ) -> Self { - Self::Real(super::real::DumpHandler::new( - dump_path, - db_path, - update_file_store, - task_store_size, - index_db_size, - env, - index_resolver, - )) - } - pub async fn run(&self, uid: String) -> Result<()> { - match self { - DumpHandler::Real(real) => real.run(uid).await, - DumpHandler::Mock(mocker) => unsafe { mocker.get("run").call(uid) }, - } - } - } -} diff --git a/meilisearch-lib/src/dump/loaders/mod.rs b/meilisearch-lib/src/dump/loaders/mod.rs deleted file mode 100644 index 199b20c02..000000000 --- a/meilisearch-lib/src/dump/loaders/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub mod v2; -pub mod v3; -pub mod v4; -pub mod v5; diff --git a/meilisearch-lib/src/dump/loaders/v1.rs b/meilisearch-lib/src/dump/loaders/v1.rs deleted file mode 100644 index a07475b56..000000000 --- a/meilisearch-lib/src/dump/loaders/v1.rs +++ /dev/null @@ -1,24 +0,0 @@ -use std::path::Path; - -use serde::{Deserialize, Serialize}; - -use crate::index_controller::IndexMetadata; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct MetadataV1 { - pub db_version: String, - indexes: Vec, -} - -impl MetadataV1 { - #[allow(dead_code, unreachable_code, unused_variables)] - pub fn load_dump( - self, - src: impl AsRef, - dst: impl AsRef, - size: usize, - indexer_options: &IndexerOpts, - ) -> anyhow::Result<()> { - anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") -} diff --git a/meilisearch-lib/src/dump/loaders/v2.rs b/meilisearch-lib/src/dump/loaders/v2.rs deleted file mode 100644 index 5926de931..000000000 --- a/meilisearch-lib/src/dump/loaders/v2.rs +++ /dev/null @@ -1,216 +0,0 @@ -use std::fs::{File, OpenOptions}; -use std::io::Write; -use std::path::{Path, PathBuf}; - -use serde_json::{Deserializer, Value}; -use tempfile::NamedTempFile; - -use crate::dump::compat::{self, v2, v3}; -use crate::dump::Metadata; -use crate::options::IndexerOpts; - -/// The dump v2 reads the dump folder and patches all the needed file to make it compatible with a -/// dump v3, then calls the dump v3 to actually handle the dump. -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - update_db_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - log::info!("Patching dump V2 to dump V3..."); - let indexes_path = src.as_ref().join("indexes"); - - let dir_entries = std::fs::read_dir(indexes_path)?; - for entry in dir_entries { - let entry = entry?; - - // rename the index folder - let path = entry.path(); - let new_path = patch_index_uuid_path(&path).expect("invalid index folder."); - - std::fs::rename(path, &new_path)?; - - let settings_path = new_path.join("meta.json"); - - patch_settings(settings_path)?; - } - - let update_dir = src.as_ref().join("updates"); - let update_path = update_dir.join("data.jsonl"); - patch_updates(update_dir, update_path)?; - - super::v3::load_dump( - meta, - src, - dst, - index_db_size, - update_db_size, - indexing_options, - ) -} - -fn patch_index_uuid_path(path: &Path) -> Option { - let uuid = path.file_name()?.to_str()?.trim_start_matches("index-"); - let new_path = path.parent()?.join(uuid); - Some(new_path) -} - -fn patch_settings(path: impl AsRef) -> anyhow::Result<()> { - let mut meta_file = File::open(&path)?; - let mut meta: Value = serde_json::from_reader(&mut meta_file)?; - - // We first deserialize the dump meta into a serde_json::Value and change - // the custom ranking rules settings from the old format to the new format. - if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { - patch_custom_ranking_rules(ranking_rules); - } - - let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?; - - serde_json::to_writer(&mut meta_file, &meta)?; - - Ok(()) -} - -fn patch_updates(dir: impl AsRef, path: impl AsRef) -> anyhow::Result<()> { - let mut output_update_file = NamedTempFile::new_in(&dir)?; - let update_file = File::open(&path)?; - - let stream = Deserializer::from_reader(update_file).into_iter::(); - - for update in stream { - let update_entry = update?; - - let update_entry = v3::UpdateEntry::from(update_entry); - - serde_json::to_writer(&mut output_update_file, &update_entry)?; - output_update_file.write_all(b"\n")?; - } - - output_update_file.flush()?; - output_update_file.persist(path)?; - - Ok(()) -} - -/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`. -/// -/// This is done for compatibility reasons, and to avoid a new dump version, -/// since the new syntax was introduced soon after the new dump version. -fn patch_custom_ranking_rules(ranking_rules: &mut Value) { - *ranking_rules = match ranking_rules.take() { - Value::Array(values) => values - .into_iter() - .filter_map(|value| match value { - Value::String(s) if s.starts_with("asc") => compat::asc_ranking_rule(&s) - .map(|f| format!("{}:asc", f)) - .map(Value::String), - Value::String(s) if s.starts_with("desc") => compat::desc_ranking_rule(&s) - .map(|f| format!("{}:desc", f)) - .map(Value::String), - otherwise => Some(otherwise), - }) - .collect(), - otherwise => otherwise, - } -} - -impl From for v3::UpdateEntry { - fn from(v2::UpdateEntry { uuid, update }: v2::UpdateEntry) -> Self { - let update = match update { - v2::UpdateStatus::Processing(meta) => v3::UpdateStatus::Processing(meta.into()), - v2::UpdateStatus::Enqueued(meta) => v3::UpdateStatus::Enqueued(meta.into()), - v2::UpdateStatus::Processed(meta) => v3::UpdateStatus::Processed(meta.into()), - v2::UpdateStatus::Aborted(_) => unreachable!("Updates could never be aborted."), - v2::UpdateStatus::Failed(meta) => v3::UpdateStatus::Failed(meta.into()), - }; - - Self { uuid, update } - } -} - -impl From for v3::Failed { - fn from(other: v2::Failed) -> Self { - let v2::Failed { - from, - error, - failed_at, - } = other; - - Self { - from: from.into(), - msg: error.message, - code: v2::error_code_from_str(&error.error_code) - .expect("Invalid update: Invalid error code"), - failed_at, - } - } -} - -impl From for v3::Processing { - fn from(other: v2::Processing) -> Self { - let v2::Processing { - from, - started_processing_at, - } = other; - - Self { - from: from.into(), - started_processing_at, - } - } -} - -impl From for v3::Enqueued { - fn from(other: v2::Enqueued) -> Self { - let v2::Enqueued { - update_id, - meta, - enqueued_at, - content, - } = other; - - let meta = match meta { - v2::UpdateMeta::DocumentsAddition { - method, - primary_key, - .. - } => { - v3::Update::DocumentAddition { - primary_key, - method, - // Just ignore if the uuid is no present. If it is needed later, an error will - // be thrown. - content_uuid: content.unwrap_or_default(), - } - } - v2::UpdateMeta::ClearDocuments => v3::Update::ClearDocuments, - v2::UpdateMeta::DeleteDocuments { ids } => v3::Update::DeleteDocuments(ids), - v2::UpdateMeta::Settings(settings) => v3::Update::Settings(settings), - }; - - Self { - update_id, - meta, - enqueued_at, - } - } -} - -impl From for v3::Processed { - fn from(other: v2::Processed) -> Self { - let v2::Processed { - from, - success, - processed_at, - } = other; - - Self { - success, - processed_at, - from: from.into(), - } - } -} diff --git a/meilisearch-lib/src/dump/loaders/v3.rs b/meilisearch-lib/src/dump/loaders/v3.rs deleted file mode 100644 index 44984c946..000000000 --- a/meilisearch-lib/src/dump/loaders/v3.rs +++ /dev/null @@ -1,136 +0,0 @@ -use std::collections::HashMap; -use std::fs::{self, File}; -use std::io::{BufReader, BufWriter, Write}; -use std::path::Path; - -use anyhow::Context; -use fs_extra::dir::{self, CopyOptions}; -use log::info; -use tempfile::tempdir; -use uuid::Uuid; - -use crate::dump::compat::{self, v3}; -use crate::dump::Metadata; -use crate::index_resolver::meta_store::{DumpEntry, IndexMeta}; -use crate::options::IndexerOpts; -use crate::tasks::task::TaskId; - -/// dump structure for V3: -/// . -/// ├── indexes -/// │   └── 25f10bb8-6ea8-42f0-bd48-ad5857f77648 -/// │   ├── documents.jsonl -/// │   └── meta.json -/// ├── index_uuids -/// │   └── data.jsonl -/// ├── metadata.json -/// └── updates -/// └── data.jsonl - -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - info!("Patching dump V3 to dump V4..."); - - let patched_dir = tempdir()?; - - let options = CopyOptions::default(); - dir::copy(src.as_ref().join("indexes"), patched_dir.path(), &options)?; - dir::copy( - src.as_ref().join("index_uuids"), - patched_dir.path(), - &options, - )?; - - let uuid_map = patch_index_meta( - src.as_ref().join("index_uuids/data.jsonl"), - patched_dir.path(), - )?; - - fs::copy( - src.as_ref().join("metadata.json"), - patched_dir.path().join("metadata.json"), - )?; - - patch_updates(&src, patched_dir.path(), uuid_map)?; - - super::v4::load_dump( - meta, - patched_dir.path(), - dst, - index_db_size, - meta_env_size, - indexing_options, - ) -} - -fn patch_index_meta( - path: impl AsRef, - dst: impl AsRef, -) -> anyhow::Result> { - let file = BufReader::new(File::open(path)?); - let dst = dst.as_ref().join("index_uuids"); - fs::create_dir_all(&dst)?; - let mut dst_file = File::create(dst.join("data.jsonl"))?; - - let map = serde_json::Deserializer::from_reader(file) - .into_iter::() - .try_fold(HashMap::new(), |mut map, entry| -> anyhow::Result<_> { - let entry = entry?; - map.insert(entry.uuid, entry.uid.clone()); - let meta = IndexMeta { - uuid: entry.uuid, - // This is lost information, we patch it to 0; - creation_task_id: 0, - }; - let entry = DumpEntry { - uid: entry.uid, - index_meta: meta, - }; - serde_json::to_writer(&mut dst_file, &entry)?; - dst_file.write_all(b"\n")?; - Ok(map) - })?; - - dst_file.flush()?; - - Ok(map) -} - -fn patch_updates( - src: impl AsRef, - dst: impl AsRef, - uuid_map: HashMap, -) -> anyhow::Result<()> { - let dst = dst.as_ref().join("updates"); - fs::create_dir_all(&dst)?; - - let mut dst_file = BufWriter::new(File::create(dst.join("data.jsonl"))?); - let src_file = BufReader::new(File::open(src.as_ref().join("updates/data.jsonl"))?); - - serde_json::Deserializer::from_reader(src_file) - .into_iter::() - .enumerate() - .try_for_each(|(task_id, entry)| -> anyhow::Result<()> { - let entry = entry?; - let name = uuid_map - .get(&entry.uuid) - .with_context(|| format!("Unknown index uuid: {}", entry.uuid))? - .clone(); - serde_json::to_writer( - &mut dst_file, - &compat::v4::Task::from((entry.update, name, task_id as TaskId)), - )?; - dst_file.write_all(b"\n")?; - Ok(()) - })?; - - dst_file.flush()?; - - Ok(()) -} diff --git a/meilisearch-lib/src/dump/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs deleted file mode 100644 index 0744df7ea..000000000 --- a/meilisearch-lib/src/dump/loaders/v4.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::fs::{self, create_dir_all, File}; -use std::io::{BufReader, Write}; -use std::path::Path; - -use fs_extra::dir::{self, CopyOptions}; -use log::info; -use serde_json::{Deserializer, Map, Value}; -use tempfile::tempdir; -use uuid::Uuid; - -use crate::dump::{compat, Metadata}; -use crate::options::IndexerOpts; -use crate::tasks::task::Task; - -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - info!("Patching dump V4 to dump V5..."); - - let patched_dir = tempdir()?; - let options = CopyOptions::default(); - - // Indexes - dir::copy(src.as_ref().join("indexes"), &patched_dir, &options)?; - - // Index uuids - dir::copy(src.as_ref().join("index_uuids"), &patched_dir, &options)?; - - // Metadata - fs::copy( - src.as_ref().join("metadata.json"), - patched_dir.path().join("metadata.json"), - )?; - - // Updates - patch_updates(&src, &patched_dir)?; - - // Keys - patch_keys(&src, &patched_dir)?; - - super::v5::load_dump( - meta, - &patched_dir, - dst, - index_db_size, - meta_env_size, - indexing_options, - ) -} - -fn patch_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let updates_path = src.as_ref().join("updates/data.jsonl"); - let output_updates_path = dst.as_ref().join("updates/data.jsonl"); - create_dir_all(output_updates_path.parent().unwrap())?; - let udpates_file = File::open(updates_path)?; - let mut output_update_file = File::create(output_updates_path)?; - - serde_json::Deserializer::from_reader(udpates_file) - .into_iter::() - .try_for_each(|task| -> anyhow::Result<()> { - let task: Task = task?.into(); - - serde_json::to_writer(&mut output_update_file, &task)?; - output_update_file.write_all(b"\n")?; - - Ok(()) - })?; - - output_update_file.flush()?; - - Ok(()) -} - -fn patch_keys(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let keys_file_src = src.as_ref().join("keys"); - - if !keys_file_src.exists() { - return Ok(()); - } - - fs::create_dir_all(&dst)?; - let keys_file_dst = dst.as_ref().join("keys"); - let mut writer = File::create(&keys_file_dst)?; - - let reader = BufReader::new(File::open(&keys_file_src)?); - for key in Deserializer::from_reader(reader).into_iter() { - let mut key: Map = key?; - - // generate a new uuid v4 and insert it in the key. - let uid = serde_json::to_value(Uuid::new_v4()).unwrap(); - key.insert("uid".to_string(), uid); - - serde_json::to_writer(&mut writer, &key)?; - writer.write_all(b"\n")?; - } - - Ok(()) -} diff --git a/meilisearch-lib/src/dump/loaders/v5.rs b/meilisearch-lib/src/dump/loaders/v5.rs deleted file mode 100644 index fcb4224bb..000000000 --- a/meilisearch-lib/src/dump/loaders/v5.rs +++ /dev/null @@ -1,47 +0,0 @@ -use std::{path::Path, sync::Arc}; - -use log::info; -use meilisearch_auth::AuthController; -use milli::heed::EnvOpenOptions; - -use crate::analytics; -use crate::dump::Metadata; -use crate::index_resolver::IndexResolver; -use crate::options::IndexerOpts; -use crate::tasks::TaskStore; -use crate::update_file_store::UpdateFileStore; - -pub fn load_dump( - meta: Metadata, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, -) -> anyhow::Result<()> { - info!( - "Loading dump from {}, dump database version: {}, dump version: V5", - meta.dump_date, meta.db_version - ); - - let mut options = EnvOpenOptions::new(); - options.map_size(meta_env_size); - options.max_dbs(100); - let env = Arc::new(options.open(&dst)?); - - IndexResolver::load_dump( - src.as_ref(), - &dst, - index_db_size, - env.clone(), - indexing_options, - )?; - UpdateFileStore::load_dump(src.as_ref(), &dst)?; - TaskStore::load_dump(&src, env)?; - AuthController::load_dump(&src, &dst)?; - analytics::copy_user_id(src.as_ref(), dst.as_ref()); - - info!("Loading indexes."); - - Ok(()) -} diff --git a/meilisearch-lib/src/dump/mod.rs b/meilisearch-lib/src/dump/mod.rs deleted file mode 100644 index 10a3216f2..000000000 --- a/meilisearch-lib/src/dump/mod.rs +++ /dev/null @@ -1,262 +0,0 @@ -use std::fs::File; -use std::path::Path; - -use anyhow::bail; -use log::info; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; - -use tempfile::TempDir; - -use crate::compression::from_tar_gz; -use crate::options::IndexerOpts; - -use self::loaders::{v2, v3, v4, v5}; - -pub use handler::{generate_uid, DumpHandler}; - -mod compat; -pub mod error; -mod handler; -mod loaders; - -const META_FILE_NAME: &str = "metadata.json"; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Metadata { - db_version: String, - index_db_size: usize, - update_db_size: usize, - #[serde(with = "time::serde::rfc3339")] - dump_date: OffsetDateTime, -} - -impl Metadata { - pub fn new(index_db_size: usize, update_db_size: usize) -> Self { - Self { - db_version: env!("CARGO_PKG_VERSION").to_string(), - index_db_size, - update_db_size, - dump_date: OffsetDateTime::now_utc(), - } - } -} - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct MetadataV1 { - pub db_version: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(tag = "dumpVersion")] -pub enum MetadataVersion { - V1(MetadataV1), - V2(Metadata), - V3(Metadata), - V4(Metadata), - // V5 is forward compatible with V4 but not backward compatible. - V5(Metadata), -} - -impl MetadataVersion { - pub fn load_dump( - self, - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - meta_env_size: usize, - indexing_options: &IndexerOpts, - ) -> anyhow::Result<()> { - match self { - MetadataVersion::V1(_meta) => { - anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") - } - MetadataVersion::V2(meta) => v2::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - MetadataVersion::V3(meta) => v3::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - MetadataVersion::V4(meta) => v4::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - MetadataVersion::V5(meta) => v5::load_dump( - meta, - src, - dst, - index_db_size, - meta_env_size, - indexing_options, - )?, - } - - Ok(()) - } - - pub fn new_v5(index_db_size: usize, update_db_size: usize) -> Self { - let meta = Metadata::new(index_db_size, update_db_size); - Self::V5(meta) - } - - pub fn db_version(&self) -> &str { - match self { - Self::V1(meta) => &meta.db_version, - Self::V2(meta) | Self::V3(meta) | Self::V4(meta) | Self::V5(meta) => &meta.db_version, - } - } - - pub fn version(&self) -> &'static str { - match self { - MetadataVersion::V1(_) => "V1", - MetadataVersion::V2(_) => "V2", - MetadataVersion::V3(_) => "V3", - MetadataVersion::V4(_) => "V4", - MetadataVersion::V5(_) => "V5", - } - } - - pub fn dump_date(&self) -> Option<&OffsetDateTime> { - match self { - MetadataVersion::V1(_) => None, - MetadataVersion::V2(meta) - | MetadataVersion::V3(meta) - | MetadataVersion::V4(meta) - | MetadataVersion::V5(meta) => Some(&meta.dump_date), - } - } -} - -#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] -#[serde(rename_all = "snake_case")] -pub enum DumpStatus { - Done, - InProgress, - Failed, -} - -pub fn load_dump( - dst_path: impl AsRef, - src_path: impl AsRef, - ignore_dump_if_db_exists: bool, - ignore_missing_dump: bool, - index_db_size: usize, - update_db_size: usize, - indexer_opts: &IndexerOpts, -) -> anyhow::Result<()> { - let empty_db = crate::is_empty_db(&dst_path); - let src_path_exists = src_path.as_ref().exists(); - - if empty_db && src_path_exists { - let (tmp_src, tmp_dst, meta) = extract_dump(&dst_path, &src_path)?; - meta.load_dump( - tmp_src.path(), - tmp_dst.path(), - index_db_size, - update_db_size, - indexer_opts, - )?; - persist_dump(&dst_path, tmp_dst)?; - Ok(()) - } else if !empty_db && !ignore_dump_if_db_exists { - bail!( - "database already exists at {:?}, try to delete it or rename it", - dst_path - .as_ref() - .canonicalize() - .unwrap_or_else(|_| dst_path.as_ref().to_owned()) - ) - } else if !src_path_exists && !ignore_missing_dump { - bail!("dump doesn't exist at {:?}", src_path.as_ref()) - } else { - // there is nothing to do - Ok(()) - } -} - -fn extract_dump( - dst_path: impl AsRef, - src_path: impl AsRef, -) -> anyhow::Result<(TempDir, TempDir, MetadataVersion)> { - // Setup a temp directory path in the same path as the database, to prevent cross devices - // references. - let temp_path = dst_path - .as_ref() - .parent() - .map(ToOwned::to_owned) - .unwrap_or_else(|| ".".into()); - - let tmp_src = tempfile::tempdir_in(temp_path)?; - let tmp_src_path = tmp_src.path(); - - from_tar_gz(&src_path, tmp_src_path)?; - - let meta_path = tmp_src_path.join(META_FILE_NAME); - let mut meta_file = File::open(&meta_path)?; - let meta: MetadataVersion = serde_json::from_reader(&mut meta_file)?; - - if !dst_path.as_ref().exists() { - std::fs::create_dir_all(dst_path.as_ref())?; - } - - let tmp_dst = tempfile::tempdir_in(dst_path.as_ref())?; - - info!( - "Loading dump {}, dump database version: {}, dump version: {}", - meta.dump_date() - .map(|t| format!("from {}", t)) - .unwrap_or_else(String::new), - meta.db_version(), - meta.version() - ); - - Ok((tmp_src, tmp_dst, meta)) -} - -fn persist_dump(dst_path: impl AsRef, tmp_dst: TempDir) -> anyhow::Result<()> { - let persisted_dump = tmp_dst.into_path(); - - // Delete everything in the `data.ms` except the tempdir. - if dst_path.as_ref().exists() { - for file in dst_path.as_ref().read_dir().unwrap() { - let file = file.unwrap().path(); - if file.file_name() == persisted_dump.file_name() { - continue; - } - - if file.is_file() { - std::fs::remove_file(&file)?; - } else { - std::fs::remove_dir_all(&file)?; - } - } - } - - // Move the whole content of the tempdir into the `data.ms`. - for file in persisted_dump.read_dir().unwrap() { - let file = file.unwrap().path(); - - std::fs::rename(&file, &dst_path.as_ref().join(file.file_name().unwrap()))?; - } - - // Delete the empty tempdir. - std::fs::remove_dir_all(&persisted_dump)?; - - Ok(()) -} diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs deleted file mode 100644 index 16111a191..000000000 --- a/meilisearch-lib/src/error.rs +++ /dev/null @@ -1,55 +0,0 @@ -use std::error::Error; -use std::fmt; - -use meilisearch_types::error::{Code, ErrorCode}; -use milli::UserError; - -#[derive(Debug)] -pub struct MilliError<'a>(pub &'a milli::Error); - -impl Error for MilliError<'_> {} - -impl fmt::Display for MilliError<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -impl ErrorCode for MilliError<'_> { - fn error_code(&self) -> Code { - match self.0 { - milli::Error::InternalError(_) => Code::Internal, - milli::Error::IoError(_) => Code::Internal, - milli::Error::UserError(ref error) => { - match error { - // TODO: wait for spec for new error codes. - UserError::SerdeJson(_) - | UserError::InvalidLmdbOpenOptions - | UserError::DocumentLimitReached - | UserError::AccessingSoftDeletedDocument { .. } - | UserError::UnknownInternalDocumentId { .. } => Code::Internal, - UserError::InvalidStoreFile => Code::InvalidStore, - UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, - UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, - UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, - UserError::InvalidFilter(_) => Code::Filter, - UserError::MissingDocumentId { .. } => Code::MissingDocumentId, - UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { - Code::InvalidDocumentId - } - UserError::MissingPrimaryKey => Code::MissingPrimaryKey, - UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent, - UserError::SortRankingRuleMissing => Code::Sort, - UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, - UserError::InvalidSortableAttribute { .. } => Code::Sort, - UserError::CriterionError(_) => Code::InvalidRankingRule, - UserError::InvalidGeoField { .. } => Code::InvalidGeoField, - UserError::SortError(_) => Code::Sort, - UserError::InvalidMinTypoWordLenSetting(_, _) => { - Code::InvalidMinWordLengthForTypo - } - } - } - } - } -} diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs deleted file mode 100644 index 6a41fa7a0..000000000 --- a/meilisearch-lib/src/index/dump.rs +++ /dev/null @@ -1,160 +0,0 @@ -use std::fs::{create_dir_all, File}; -use std::io::{BufReader, Seek, SeekFrom, Write}; -use std::path::Path; - -use anyhow::Context; -use indexmap::IndexMap; -use milli::documents::DocumentsBatchReader; -use milli::heed::{EnvOpenOptions, RoTxn}; -use milli::update::{IndexDocumentsConfig, IndexerConfig}; -use serde::{Deserialize, Serialize}; - -use crate::document_formats::read_ndjson; -use crate::index::updates::apply_settings_to_builder; - -use super::error::Result; -use super::{index::Index, Settings, Unchecked}; - -#[derive(Serialize, Deserialize)] -struct DumpMeta { - settings: Settings, - primary_key: Option, -} - -const META_FILE_NAME: &str = "meta.json"; -const DATA_FILE_NAME: &str = "documents.jsonl"; - -impl Index { - pub fn dump(&self, path: impl AsRef) -> Result<()> { - // acquire write txn make sure any ongoing write is finished before we start. - let txn = self.write_txn()?; - let path = path.as_ref().join(format!("indexes/{}", self.uuid)); - - create_dir_all(&path)?; - - self.dump_documents(&txn, &path)?; - self.dump_meta(&txn, &path)?; - - Ok(()) - } - - fn dump_documents(&self, txn: &RoTxn, path: impl AsRef) -> Result<()> { - let document_file_path = path.as_ref().join(DATA_FILE_NAME); - let mut document_file = File::create(&document_file_path)?; - - let documents = self.all_documents(txn)?; - let fields_ids_map = self.fields_ids_map(txn)?; - - // dump documents - let mut json_map = IndexMap::new(); - for document in documents { - let (_, reader) = document?; - - for (fid, bytes) in reader.iter() { - if let Some(name) = fields_ids_map.name(fid) { - json_map.insert(name, serde_json::from_slice::(bytes)?); - } - } - - serde_json::to_writer(&mut document_file, &json_map)?; - document_file.write_all(b"\n")?; - - json_map.clear(); - } - - Ok(()) - } - - fn dump_meta(&self, txn: &RoTxn, path: impl AsRef) -> Result<()> { - let meta_file_path = path.as_ref().join(META_FILE_NAME); - let mut meta_file = File::create(&meta_file_path)?; - - let settings = self.settings_txn(txn)?.into_unchecked(); - let primary_key = self.primary_key(txn)?.map(String::from); - let meta = DumpMeta { - settings, - primary_key, - }; - - serde_json::to_writer(&mut meta_file, &meta)?; - - Ok(()) - } - - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - size: usize, - indexer_config: &IndexerConfig, - ) -> anyhow::Result<()> { - let dir_name = src - .as_ref() - .file_name() - .with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?; - - let dst_dir_path = dst.as_ref().join("indexes").join(dir_name); - create_dir_all(&dst_dir_path)?; - - let meta_path = src.as_ref().join(META_FILE_NAME); - let meta_file = File::open(meta_path)?; - let DumpMeta { - settings, - primary_key, - } = serde_json::from_reader(meta_file)?; - let settings = settings.check(); - - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, &dst_dir_path)?; - - let mut txn = index.write_txn()?; - - // Apply settings first - let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config); - - if let Some(primary_key) = primary_key { - builder.set_primary_key(primary_key); - } - - apply_settings_to_builder(&settings, &mut builder); - - builder.execute(|_| ())?; - - let document_file_path = src.as_ref().join(DATA_FILE_NAME); - let reader = BufReader::new(File::open(&document_file_path)?); - - let mut tmp_doc_file = tempfile::tempfile()?; - - let empty = match read_ndjson(reader, &mut tmp_doc_file) { - // if there was no document in the file it's because the index was empty - Ok(0) => true, - Ok(_) => false, - Err(e) => return Err(e.into()), - }; - - if !empty { - tmp_doc_file.seek(SeekFrom::Start(0))?; - - let documents_reader = DocumentsBatchReader::from_reader(tmp_doc_file)?; - - //If the document file is empty, we don't perform the document addition, to prevent - //a primary key error to be thrown. - let config = IndexDocumentsConfig::default(); - let builder = milli::update::IndexDocuments::new( - &mut txn, - &index, - indexer_config, - config, - |_| (), - )?; - let (builder, user_error) = builder.add_documents(documents_reader)?; - user_error?; - builder.execute()?; - } - - txn.commit()?; - index.prepare_for_closing().wait(); - - Ok(()) - } -} diff --git a/meilisearch-lib/src/index/error.rs b/meilisearch-lib/src/index/error.rs deleted file mode 100644 index f795ceaa4..000000000 --- a/meilisearch-lib/src/index/error.rs +++ /dev/null @@ -1,61 +0,0 @@ -use std::error::Error; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use serde_json::Value; - -use crate::{error::MilliError, update_file_store}; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum IndexError { - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("Document `{0}` not found.")] - DocumentNotFound(String), - #[error("{0}")] - Facet(#[from] FacetError), - #[error("{0}")] - Milli(#[from] milli::Error), -} - -internal_error!( - IndexError: std::io::Error, - milli::heed::Error, - fst::Error, - serde_json::Error, - update_file_store::UpdateFileStoreError, - milli::documents::Error -); - -impl ErrorCode for IndexError { - fn error_code(&self) -> Code { - match self { - IndexError::Internal(_) => Code::Internal, - IndexError::DocumentNotFound(_) => Code::DocumentNotFound, - IndexError::Facet(e) => e.error_code(), - IndexError::Milli(e) => MilliError(e).error_code(), - } - } -} - -impl From for IndexError { - fn from(error: milli::UserError) -> IndexError { - IndexError::Milli(error.into()) - } -} - -#[derive(Debug, thiserror::Error)] -pub enum FacetError { - #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] - InvalidExpression(&'static [&'static str], Value), -} - -impl ErrorCode for FacetError { - fn error_code(&self) -> Code { - match self { - FacetError::InvalidExpression(_, _) => Code::Filter, - } - } -} diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs deleted file mode 100644 index 02425d0bf..000000000 --- a/meilisearch-lib/src/index/index.rs +++ /dev/null @@ -1,332 +0,0 @@ -use std::collections::BTreeSet; -use std::fs::create_dir_all; -use std::marker::PhantomData; -use std::ops::Deref; -use std::path::Path; -use std::sync::Arc; - -use fst::IntoStreamer; -use milli::heed::{CompactionOption, EnvOpenOptions, RoTxn}; -use milli::update::{IndexerConfig, Setting}; -use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET}; -use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; -use time::OffsetDateTime; -use uuid::Uuid; -use walkdir::WalkDir; - -use crate::index::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS; - -use super::error::IndexError; -use super::error::Result; -use super::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings}; -use super::{Checked, Settings}; - -pub type Document = Map; - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMeta { - #[serde(with = "time::serde::rfc3339")] - pub created_at: OffsetDateTime, - #[serde(with = "time::serde::rfc3339")] - pub updated_at: OffsetDateTime, - pub primary_key: Option, -} - -impl IndexMeta { - pub fn new(index: &Index) -> Result { - let txn = index.read_txn()?; - Self::new_txn(index, &txn) - } - - pub fn new_txn(index: &Index, txn: &milli::heed::RoTxn) -> Result { - let created_at = index.created_at(txn)?; - let updated_at = index.updated_at(txn)?; - let primary_key = index.primary_key(txn)?.map(String::from); - Ok(Self { - created_at, - updated_at, - primary_key, - }) - } -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct IndexStats { - #[serde(skip)] - pub size: u64, - pub number_of_documents: u64, - /// Whether the current index is performing an update. It is initially `None` when the - /// index returns it, since it is the `UpdateStore` that knows what index is currently indexing. It is - /// later set to either true or false, we we retrieve the information from the `UpdateStore` - pub is_indexing: Option, - pub field_distribution: FieldDistribution, -} - -#[derive(Clone, derivative::Derivative)] -#[derivative(Debug)] -pub struct Index { - pub uuid: Uuid, - #[derivative(Debug = "ignore")] - pub inner: Arc, - #[derivative(Debug = "ignore")] - pub indexer_config: Arc, -} - -impl Deref for Index { - type Target = milli::Index; - - fn deref(&self) -> &Self::Target { - self.inner.as_ref() - } -} - -impl Index { - pub fn open( - path: impl AsRef, - size: usize, - uuid: Uuid, - update_handler: Arc, - ) -> Result { - log::debug!("opening index in {}", path.as_ref().display()); - create_dir_all(&path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let inner = Arc::new(milli::Index::new(options, &path)?); - Ok(Index { - inner, - uuid, - indexer_config: update_handler, - }) - } - - /// Asynchronously close the underlying index - pub fn close(self) { - self.inner.as_ref().clone().prepare_for_closing(); - } - - pub fn stats(&self) -> Result { - let rtxn = self.read_txn()?; - - Ok(IndexStats { - size: self.size(), - number_of_documents: self.number_of_documents(&rtxn)?, - is_indexing: None, - field_distribution: self.field_distribution(&rtxn)?, - }) - } - - pub fn meta(&self) -> Result { - IndexMeta::new(self) - } - pub fn settings(&self) -> Result> { - let txn = self.read_txn()?; - self.settings_txn(&txn) - } - - pub fn uuid(&self) -> Uuid { - self.uuid - } - - pub fn settings_txn(&self, txn: &RoTxn) -> Result> { - let displayed_attributes = self - .displayed_fields(txn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let searchable_attributes = self - .user_defined_searchable_fields(txn)? - .map(|fields| fields.into_iter().map(String::from).collect()); - - let filterable_attributes = self.filterable_fields(txn)?.into_iter().collect(); - - let sortable_attributes = self.sortable_fields(txn)?.into_iter().collect(); - - let criteria = self - .criteria(txn)? - .into_iter() - .map(|c| c.to_string()) - .collect(); - - let stop_words = self - .stop_words(txn)? - .map(|stop_words| -> Result> { - Ok(stop_words.stream().into_strs()?.into_iter().collect()) - }) - .transpose()? - .unwrap_or_default(); - let distinct_field = self.distinct_field(txn)?.map(String::from); - - // in milli each word in the synonyms map were split on their separator. Since we lost - // this information we are going to put space between words. - let synonyms = self - .synonyms(txn)? - .iter() - .map(|(key, values)| { - ( - key.join(" "), - values.iter().map(|value| value.join(" ")).collect(), - ) - }) - .collect(); - - let min_typo_word_len = MinWordSizeTyposSetting { - one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), - two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), - }; - - let disabled_words = match self.exact_words(txn)? { - Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), - None => BTreeSet::new(), - }; - - let disabled_attributes = self - .exact_attributes(txn)? - .into_iter() - .map(String::from) - .collect(); - - let typo_tolerance = TypoSettings { - enabled: Setting::Set(self.authorize_typos(txn)?), - min_word_size_for_typos: Setting::Set(min_typo_word_len), - disable_on_words: Setting::Set(disabled_words), - disable_on_attributes: Setting::Set(disabled_attributes), - }; - - let faceting = FacetingSettings { - max_values_per_facet: Setting::Set( - self.max_values_per_facet(txn)? - .unwrap_or(DEFAULT_VALUES_PER_FACET), - ), - }; - - let pagination = PaginationSettings { - max_total_hits: Setting::Set( - self.pagination_max_total_hits(txn)? - .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), - ), - }; - - Ok(Settings { - displayed_attributes: match displayed_attributes { - Some(attrs) => Setting::Set(attrs), - None => Setting::Reset, - }, - searchable_attributes: match searchable_attributes { - Some(attrs) => Setting::Set(attrs), - None => Setting::Reset, - }, - filterable_attributes: Setting::Set(filterable_attributes), - sortable_attributes: Setting::Set(sortable_attributes), - ranking_rules: Setting::Set(criteria), - stop_words: Setting::Set(stop_words), - distinct_attribute: match distinct_field { - Some(field) => Setting::Set(field), - None => Setting::Reset, - }, - synonyms: Setting::Set(synonyms), - typo_tolerance: Setting::Set(typo_tolerance), - faceting: Setting::Set(faceting), - pagination: Setting::Set(pagination), - _kind: PhantomData, - }) - } - - /// Return the total number of documents contained in the index + the selected documents. - pub fn retrieve_documents>( - &self, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - let txn = self.read_txn()?; - - let fields_ids_map = self.fields_ids_map(&txn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - - let mut documents = Vec::new(); - for entry in self.all_documents(&txn)?.skip(offset).take(limit) { - let (_id, obkv) = entry?; - let document = obkv_to_json(&all_fields, &fields_ids_map, obkv)?; - let document = match &attributes_to_retrieve { - Some(attributes_to_retrieve) => permissive_json_pointer::select_values( - &document, - attributes_to_retrieve.iter().map(|s| s.as_ref()), - ), - None => document, - }; - documents.push(document); - } - - let number_of_documents = self.number_of_documents(&txn)?; - - Ok((number_of_documents, documents)) - } - - pub fn retrieve_document>( - &self, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let txn = self.read_txn()?; - - let fields_ids_map = self.fields_ids_map(&txn)?; - let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); - - let internal_id = self - .external_documents_ids(&txn)? - .get(doc_id.as_bytes()) - .ok_or_else(|| IndexError::DocumentNotFound(doc_id.clone()))?; - - let document = self - .documents(&txn, std::iter::once(internal_id))? - .into_iter() - .next() - .map(|(_, d)| d) - .ok_or(IndexError::DocumentNotFound(doc_id))?; - - let document = obkv_to_json(&all_fields, &fields_ids_map, document)?; - let document = match &attributes_to_retrieve { - Some(attributes_to_retrieve) => permissive_json_pointer::select_values( - &document, - attributes_to_retrieve.iter().map(|s| s.as_ref()), - ), - None => document, - }; - - Ok(document) - } - - pub fn size(&self) -> u64 { - WalkDir::new(self.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len()) - } - - pub fn snapshot(&self, path: impl AsRef) -> Result<()> { - let mut dst = path.as_ref().join(format!("indexes/{}/", self.uuid)); - create_dir_all(&dst)?; - dst.push("data.mdb"); - let _txn = self.write_txn()?; - self.inner.copy_to_path(dst, CompactionOption::Enabled)?; - Ok(()) - } -} - -/// When running tests, when a server instance is dropped, the environment is not actually closed, -/// leaving a lot of open file descriptors. -impl Drop for Index { - fn drop(&mut self) { - // When dropping the last instance of an index, we want to close the index - // Note that the close is actually performed only if all the instances a effectively - // dropped - - if Arc::strong_count(&self.inner) == 1 { - self.inner.as_ref().clone().prepare_for_closing(); - } - } -} diff --git a/meilisearch-lib/src/index/mod.rs b/meilisearch-lib/src/index/mod.rs deleted file mode 100644 index 98c25366d..000000000 --- a/meilisearch-lib/src/index/mod.rs +++ /dev/null @@ -1,249 +0,0 @@ -pub use search::{ - MatchingStrategy, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, - DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, -}; -pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; - -mod dump; -pub mod error; -mod search; -pub mod updates; - -#[allow(clippy::module_inception)] -mod index; - -pub use index::{Document, IndexMeta, IndexStats}; - -#[cfg(not(test))] -pub use index::Index; - -#[cfg(test)] -pub use test::MockIndex as Index; - -/// The index::test module provides means of mocking an index instance. I can be used throughout the -/// code for unit testing, in places where an index would normally be used. -#[cfg(test)] -pub mod test { - use std::path::{Path, PathBuf}; - use std::sync::Arc; - - use milli::update::{ - DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsMethod, IndexerConfig, - }; - use nelson::Mocker; - use uuid::Uuid; - - use super::error::Result; - use super::index::Index; - use super::Document; - use super::{Checked, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings}; - use crate::update_file_store::UpdateFileStore; - - #[derive(Clone)] - pub enum MockIndex { - Real(Index), - Mock(Arc), - } - - impl MockIndex { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(Arc::new(mocker)) - } - - pub fn open( - path: impl AsRef, - size: usize, - uuid: Uuid, - update_handler: Arc, - ) -> Result { - let index = Index::open(path, size, uuid, update_handler)?; - Ok(Self::Real(index)) - } - - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - size: usize, - update_handler: &IndexerConfig, - ) -> anyhow::Result<()> { - Index::load_dump(src, dst, size, update_handler) - } - - pub fn uuid(&self) -> Uuid { - match self { - MockIndex::Real(index) => index.uuid(), - MockIndex::Mock(m) => unsafe { m.get("uuid").call(()) }, - } - } - - pub fn stats(&self) -> Result { - match self { - MockIndex::Real(index) => index.stats(), - MockIndex::Mock(m) => unsafe { m.get("stats").call(()) }, - } - } - - pub fn meta(&self) -> Result { - match self { - MockIndex::Real(index) => index.meta(), - MockIndex::Mock(_) => todo!(), - } - } - pub fn settings(&self) -> Result> { - match self { - MockIndex::Real(index) => index.settings(), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn retrieve_documents>( - &self, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - match self { - MockIndex::Real(index) => { - index.retrieve_documents(offset, limit, attributes_to_retrieve) - } - MockIndex::Mock(_) => todo!(), - } - } - - pub fn retrieve_document>( - &self, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - match self { - MockIndex::Real(index) => index.retrieve_document(doc_id, attributes_to_retrieve), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn size(&self) -> u64 { - match self { - MockIndex::Real(index) => index.size(), - MockIndex::Mock(_) => todo!(), - } - } - - pub fn snapshot(&self, path: impl AsRef) -> Result<()> { - match self { - MockIndex::Real(index) => index.snapshot(path), - MockIndex::Mock(m) => unsafe { m.get("snapshot").call(path.as_ref()) }, - } - } - - pub fn close(self) { - match self { - MockIndex::Real(index) => index.close(), - MockIndex::Mock(m) => unsafe { m.get("close").call(()) }, - } - } - - pub fn perform_search(&self, query: SearchQuery) -> Result { - match self { - MockIndex::Real(index) => index.perform_search(query), - MockIndex::Mock(m) => unsafe { m.get("perform_search").call(query) }, - } - } - - pub fn dump(&self, path: impl AsRef) -> Result<()> { - match self { - MockIndex::Real(index) => index.dump(path), - MockIndex::Mock(m) => unsafe { m.get("dump").call(path.as_ref()) }, - } - } - - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - primary_key: Option, - file_store: UpdateFileStore, - contents: impl Iterator, - ) -> Result>> { - match self { - MockIndex::Real(index) => { - index.update_documents(method, primary_key, file_store, contents) - } - MockIndex::Mock(mocker) => unsafe { - mocker - .get("update_documents") - .call((method, primary_key, file_store, contents)) - }, - } - } - - pub fn update_settings(&self, settings: &Settings) -> Result<()> { - match self { - MockIndex::Real(index) => index.update_settings(settings), - MockIndex::Mock(m) => unsafe { m.get("update_settings").call(settings) }, - } - } - - pub fn update_primary_key(&self, primary_key: String) -> Result { - match self { - MockIndex::Real(index) => index.update_primary_key(primary_key), - MockIndex::Mock(m) => unsafe { m.get("update_primary_key").call(primary_key) }, - } - } - - pub fn delete_documents(&self, ids: &[String]) -> Result { - match self { - MockIndex::Real(index) => index.delete_documents(ids), - MockIndex::Mock(m) => unsafe { m.get("delete_documents").call(ids) }, - } - } - - pub fn clear_documents(&self) -> Result<()> { - match self { - MockIndex::Real(index) => index.clear_documents(), - MockIndex::Mock(m) => unsafe { m.get("clear_documents").call(()) }, - } - } - } - - #[test] - fn test_faux_index() { - let faux = Mocker::default(); - faux.when("snapshot") - .times(2) - .then(|_: &Path| -> Result<()> { Ok(()) }); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } - - #[test] - #[should_panic] - fn test_faux_unexisting_method_stub() { - let faux = Mocker::default(); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } - - #[test] - #[should_panic] - fn test_faux_panic() { - let faux = Mocker::default(); - faux.when("snapshot") - .times(2) - .then(|_: &Path| -> Result<()> { - panic!(); - }); - - let index = MockIndex::mock(faux); - - let path = PathBuf::from("hello"); - index.snapshot(&path).unwrap(); - index.snapshot(&path).unwrap(); - } -} diff --git a/meilisearch-lib/src/index_controller/error.rs b/meilisearch-lib/src/index_controller/error.rs deleted file mode 100644 index ab2dd142d..000000000 --- a/meilisearch-lib/src/index_controller/error.rs +++ /dev/null @@ -1,72 +0,0 @@ -use std::error::Error; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::index_uid::IndexUidFormatError; -use meilisearch_types::internal_error; -use tokio::task::JoinError; - -use super::DocumentAdditionFormat; -use crate::document_formats::DocumentFormatError; -use crate::dump::error::DumpError; -use crate::index::error::IndexError; -use crate::tasks::error::TaskError; -use crate::update_file_store::UpdateFileStoreError; - -use crate::index_resolver::error::IndexResolverError; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum IndexControllerError { - #[error("Index creation must have an uid")] - MissingUid, - #[error("{0}")] - IndexResolver(#[from] IndexResolverError), - #[error("{0}")] - IndexError(#[from] IndexError), - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("{0}")] - TaskError(#[from] TaskError), - #[error("{0}")] - DumpError(#[from] DumpError), - #[error("{0}")] - DocumentFormatError(#[from] DocumentFormatError), - #[error("A {0} payload is missing.")] - MissingPayload(DocumentAdditionFormat), - #[error("The provided payload reached the size limit.")] - PayloadTooLarge, -} - -internal_error!(IndexControllerError: JoinError, UpdateFileStoreError); - -impl From for IndexControllerError { - fn from(other: actix_web::error::PayloadError) -> Self { - match other { - actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge, - _ => Self::Internal(Box::new(other)), - } - } -} - -impl ErrorCode for IndexControllerError { - fn error_code(&self) -> Code { - match self { - IndexControllerError::MissingUid => Code::BadRequest, - IndexControllerError::IndexResolver(e) => e.error_code(), - IndexControllerError::IndexError(e) => e.error_code(), - IndexControllerError::Internal(_) => Code::Internal, - IndexControllerError::TaskError(e) => e.error_code(), - IndexControllerError::DocumentFormatError(e) => e.error_code(), - IndexControllerError::MissingPayload(_) => Code::MissingPayload, - IndexControllerError::PayloadTooLarge => Code::PayloadTooLarge, - IndexControllerError::DumpError(e) => e.error_code(), - } - } -} - -impl From for IndexControllerError { - fn from(err: IndexUidFormatError) -> Self { - IndexResolverError::from(err).into() - } -} diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs deleted file mode 100644 index be855300b..000000000 --- a/meilisearch-lib/src/index_controller/mod.rs +++ /dev/null @@ -1,779 +0,0 @@ -use meilisearch_auth::SearchRules; -use std::collections::BTreeMap; -use std::fmt; -use std::io::Cursor; -use std::path::{Path, PathBuf}; -use std::str::FromStr; -use std::sync::Arc; -use std::time::Duration; - -use actix_web::error::PayloadError; -use bytes::Bytes; -use futures::Stream; -use futures::StreamExt; -use meilisearch_types::index_uid::IndexUid; -use milli::update::IndexDocumentsMethod; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use tokio::sync::RwLock; -use tokio::task::spawn_blocking; -use tokio::time::sleep; -use uuid::Uuid; - -use crate::document_formats::{read_csv, read_json, read_ndjson}; -use crate::dump::{self, load_dump, DumpHandler}; -use crate::index::{ - Checked, Document, IndexMeta, IndexStats, SearchQuery, SearchResult, Settings, Unchecked, -}; -use crate::index_resolver::error::IndexResolverError; -use crate::options::{IndexerOpts, SchedulerConfig}; -use crate::snapshot::{load_snapshot, SnapshotService}; -use crate::tasks::error::TaskError; -use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskId}; -use crate::tasks::{ - BatchHandler, EmptyBatchHandler, Scheduler, SnapshotHandler, TaskFilter, TaskStore, -}; -use error::Result; - -use self::error::IndexControllerError; -use crate::index_resolver::index_store::{IndexStore, MapIndexStore}; -use crate::index_resolver::meta_store::{HeedMetaStore, IndexMetaStore}; -use crate::index_resolver::{create_index_resolver, IndexResolver}; -use crate::update_file_store::UpdateFileStore; - -pub mod error; -pub mod versioning; - -/// Concrete implementation of the IndexController, exposed by meilisearch-lib -pub type MeiliSearch = IndexController; - -pub type Payload = Box< - dyn Stream> + Send + Sync + 'static + Unpin, ->; - -pub fn open_meta_env(path: &Path, size: usize) -> milli::heed::Result { - let mut options = milli::heed::EnvOpenOptions::new(); - options.map_size(size); - options.max_dbs(20); - options.open(path) -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -#[serde(rename_all = "camelCase")] -pub struct IndexMetadata { - #[serde(skip)] - pub uuid: Uuid, - pub uid: String, - #[serde(flatten)] - pub meta: IndexMeta, -} - -#[derive(Clone, Debug)] -pub struct IndexSettings { - pub uid: Option, - pub primary_key: Option, -} - -pub struct IndexController { - pub index_resolver: Arc>, - scheduler: Arc>, - task_store: TaskStore, - pub update_file_store: UpdateFileStore, -} - -/// Need a custom implementation for clone because deriving require that U and I are clone. -impl Clone for IndexController { - fn clone(&self) -> Self { - Self { - index_resolver: self.index_resolver.clone(), - scheduler: self.scheduler.clone(), - update_file_store: self.update_file_store.clone(), - task_store: self.task_store.clone(), - } - } -} - -#[derive(Debug)] -pub enum DocumentAdditionFormat { - Json, - Csv, - Ndjson, -} - -impl fmt::Display for DocumentAdditionFormat { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - DocumentAdditionFormat::Json => write!(f, "json"), - DocumentAdditionFormat::Ndjson => write!(f, "ndjson"), - DocumentAdditionFormat::Csv => write!(f, "csv"), - } - } -} - -#[derive(Serialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct Stats { - pub database_size: u64, - #[serde(serialize_with = "time::serde::rfc3339::option::serialize")] - pub last_update: Option, - pub indexes: BTreeMap, -} - -#[allow(clippy::large_enum_variant)] -#[derive(derivative::Derivative)] -#[derivative(Debug)] -pub enum Update { - DeleteDocuments(Vec), - ClearDocuments, - Settings { - settings: Settings, - /// Indicates whether the update was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - DocumentAddition { - #[derivative(Debug = "ignore")] - payload: Payload, - primary_key: Option, - method: IndexDocumentsMethod, - format: DocumentAdditionFormat, - allow_index_creation: bool, - }, - DeleteIndex, - CreateIndex { - primary_key: Option, - }, - UpdateIndex { - primary_key: Option, - }, -} - -#[derive(Default, Debug)] -pub struct IndexControllerBuilder { - max_index_size: Option, - max_task_store_size: Option, - snapshot_dir: Option, - import_snapshot: Option, - snapshot_interval: Option, - ignore_snapshot_if_db_exists: bool, - ignore_missing_snapshot: bool, - schedule_snapshot: bool, - dump_src: Option, - dump_dst: Option, - ignore_dump_if_db_exists: bool, - ignore_missing_dump: bool, -} - -impl IndexControllerBuilder { - pub fn build( - self, - db_path: impl AsRef, - indexer_options: IndexerOpts, - scheduler_config: SchedulerConfig, - ) -> anyhow::Result { - let index_size = self - .max_index_size - .ok_or_else(|| anyhow::anyhow!("Missing index size"))?; - let task_store_size = self - .max_task_store_size - .ok_or_else(|| anyhow::anyhow!("Missing update database size"))?; - - if let Some(ref path) = self.import_snapshot { - log::info!("Loading from snapshot {:?}", path); - load_snapshot( - db_path.as_ref(), - path, - self.ignore_snapshot_if_db_exists, - self.ignore_missing_snapshot, - )?; - } else if let Some(ref src_path) = self.dump_src { - load_dump( - db_path.as_ref(), - src_path, - self.ignore_dump_if_db_exists, - self.ignore_missing_dump, - index_size, - task_store_size, - &indexer_options, - )?; - } else if db_path.as_ref().exists() { - // Directory could be pre-created without any database in. - let db_is_empty = db_path.as_ref().read_dir()?.next().is_none(); - if !db_is_empty { - versioning::check_version_file(db_path.as_ref())?; - } - } - - std::fs::create_dir_all(db_path.as_ref())?; - - let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?); - - let update_file_store = UpdateFileStore::new(&db_path)?; - // Create or overwrite the version file for this DB - versioning::create_version_file(db_path.as_ref())?; - - let index_resolver = Arc::new(create_index_resolver( - &db_path, - index_size, - &indexer_options, - meta_env.clone(), - update_file_store.clone(), - )?); - - let dump_path = self - .dump_dst - .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; - - let dump_handler = Arc::new(DumpHandler::new( - dump_path, - db_path.as_ref().into(), - update_file_store.clone(), - task_store_size, - index_size, - meta_env.clone(), - index_resolver.clone(), - )); - let task_store = TaskStore::new(meta_env)?; - - // register all the batch handlers for use with the scheduler. - let handlers: Vec> = vec![ - index_resolver.clone(), - dump_handler, - Arc::new(SnapshotHandler), - // dummy handler to catch all empty batches - Arc::new(EmptyBatchHandler), - ]; - let scheduler = Scheduler::new(task_store.clone(), handlers, scheduler_config)?; - - if self.schedule_snapshot { - let snapshot_period = self - .snapshot_interval - .ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?; - let snapshot_path = self - .snapshot_dir - .ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?; - - let snapshot_service = SnapshotService { - db_path: db_path.as_ref().to_path_buf(), - snapshot_period, - snapshot_path, - index_size, - meta_env_size: task_store_size, - scheduler: scheduler.clone(), - }; - - tokio::task::spawn_local(snapshot_service.run()); - } - - Ok(IndexController { - index_resolver, - scheduler, - update_file_store, - task_store, - }) - } - - /// Set the index controller builder's max update store size. - pub fn set_max_task_store_size(&mut self, max_update_store_size: usize) -> &mut Self { - let max_update_store_size = clamp_to_page_size(max_update_store_size); - self.max_task_store_size.replace(max_update_store_size); - self - } - - pub fn set_max_index_size(&mut self, size: usize) -> &mut Self { - let size = clamp_to_page_size(size); - self.max_index_size.replace(size); - self - } - - /// Set the index controller builder's snapshot path. - pub fn set_snapshot_dir(&mut self, snapshot_dir: PathBuf) -> &mut Self { - self.snapshot_dir.replace(snapshot_dir); - self - } - - /// Set the index controller builder's ignore snapshot if db exists. - pub fn set_ignore_snapshot_if_db_exists( - &mut self, - ignore_snapshot_if_db_exists: bool, - ) -> &mut Self { - self.ignore_snapshot_if_db_exists = ignore_snapshot_if_db_exists; - self - } - - /// Set the index controller builder's ignore missing snapshot. - pub fn set_ignore_missing_snapshot(&mut self, ignore_missing_snapshot: bool) -> &mut Self { - self.ignore_missing_snapshot = ignore_missing_snapshot; - self - } - - /// Set the index controller builder's import snapshot. - pub fn set_import_snapshot(&mut self, import_snapshot: PathBuf) -> &mut Self { - self.import_snapshot.replace(import_snapshot); - self - } - - /// Set the index controller builder's snapshot interval sec. - pub fn set_snapshot_interval(&mut self, snapshot_interval: Duration) -> &mut Self { - self.snapshot_interval = Some(snapshot_interval); - self - } - - /// Set the index controller builder's schedule snapshot. - pub fn set_schedule_snapshot(&mut self) -> &mut Self { - self.schedule_snapshot = true; - self - } - - /// Set the index controller builder's dump src. - pub fn set_dump_src(&mut self, dump_src: PathBuf) -> &mut Self { - self.dump_src.replace(dump_src); - self - } - - /// Set the index controller builder's dump dst. - pub fn set_dump_dst(&mut self, dump_dst: PathBuf) -> &mut Self { - self.dump_dst.replace(dump_dst); - self - } - - /// Set the index controller builder's ignore dump if db exists. - pub fn set_ignore_dump_if_db_exists(&mut self, ignore_dump_if_db_exists: bool) -> &mut Self { - self.ignore_dump_if_db_exists = ignore_dump_if_db_exists; - self - } - - /// Set the index controller builder's ignore missing dump. - pub fn set_ignore_missing_dump(&mut self, ignore_missing_dump: bool) -> &mut Self { - self.ignore_missing_dump = ignore_missing_dump; - self - } -} - -impl IndexController -where - U: IndexMetaStore, - I: IndexStore, -{ - pub fn builder() -> IndexControllerBuilder { - IndexControllerBuilder::default() - } - - pub async fn register_update(&self, uid: String, update: Update) -> Result { - let index_uid = IndexUid::from_str(&uid).map_err(IndexResolverError::from)?; - let content = match update { - Update::DeleteDocuments(ids) => TaskContent::DocumentDeletion { - index_uid, - deletion: DocumentDeletion::Ids(ids), - }, - Update::ClearDocuments => TaskContent::DocumentDeletion { - index_uid, - deletion: DocumentDeletion::Clear, - }, - Update::Settings { - settings, - is_deletion, - allow_index_creation, - } => TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - index_uid, - }, - Update::DocumentAddition { - mut payload, - primary_key, - format, - method, - allow_index_creation, - } => { - let mut buffer = Vec::new(); - while let Some(bytes) = payload.next().await { - let bytes = bytes?; - buffer.extend_from_slice(&bytes); - } - let (content_uuid, mut update_file) = self.update_file_store.new_update()?; - let documents_count = tokio::task::spawn_blocking(move || -> Result<_> { - // check if the payload is empty, and return an error - if buffer.is_empty() { - return Err(IndexControllerError::MissingPayload(format)); - } - - let reader = Cursor::new(buffer); - let count = match format { - DocumentAdditionFormat::Json => read_json(reader, &mut *update_file)?, - DocumentAdditionFormat::Csv => read_csv(reader, &mut *update_file)?, - DocumentAdditionFormat::Ndjson => read_ndjson(reader, &mut *update_file)?, - }; - - update_file.persist()?; - - Ok(count) - }) - .await??; - - TaskContent::DocumentAddition { - content_uuid, - merge_strategy: method, - primary_key, - documents_count, - allow_index_creation, - index_uid, - } - } - Update::DeleteIndex => TaskContent::IndexDeletion { index_uid }, - Update::CreateIndex { primary_key } => TaskContent::IndexCreation { - primary_key, - index_uid, - }, - Update::UpdateIndex { primary_key } => TaskContent::IndexUpdate { - primary_key, - index_uid, - }, - }; - - let task = self.task_store.register(content).await?; - self.scheduler.read().await.notify(); - - Ok(task) - } - - pub async fn register_dump_task(&self) -> Result { - let uid = dump::generate_uid(); - let content = TaskContent::Dump { uid }; - let task = self.task_store.register(content).await?; - self.scheduler.read().await.notify(); - Ok(task) - } - - pub async fn get_task(&self, id: TaskId, filter: Option) -> Result { - let task = self.scheduler.read().await.get_task(id, filter).await?; - Ok(task) - } - - pub async fn get_index_task(&self, index_uid: String, task_id: TaskId) -> Result { - let creation_task_id = self - .index_resolver - .get_index_creation_task_id(index_uid.clone()) - .await?; - if task_id < creation_task_id { - return Err(TaskError::UnexistingTask(task_id).into()); - } - - let mut filter = TaskFilter::default(); - filter.filter_index(index_uid); - let task = self - .scheduler - .read() - .await - .get_task(task_id, Some(filter)) - .await?; - - Ok(task) - } - - pub async fn list_tasks( - &self, - filter: Option, - limit: Option, - offset: Option, - ) -> Result> { - let tasks = self - .scheduler - .read() - .await - .list_tasks(offset, filter, limit) - .await?; - - Ok(tasks) - } - - pub async fn list_index_task( - &self, - index_uid: String, - limit: Option, - offset: Option, - ) -> Result> { - let task_id = self - .index_resolver - .get_index_creation_task_id(index_uid.clone()) - .await?; - - let mut filter = TaskFilter::default(); - filter.filter_index(index_uid); - - let tasks = self - .scheduler - .read() - .await - .list_tasks( - Some(offset.unwrap_or_default() + task_id), - Some(filter), - limit, - ) - .await?; - - Ok(tasks) - } - - pub async fn list_indexes(&self) -> Result> { - let indexes = self.index_resolver.list().await?; - let mut ret = Vec::new(); - for (uid, index) in indexes { - let meta = index.meta()?; - let meta = IndexMetadata { - uuid: index.uuid(), - uid, - meta, - }; - ret.push(meta); - } - - Ok(ret) - } - - pub async fn settings(&self, uid: String) -> Result> { - let index = self.index_resolver.get_index(uid).await?; - let settings = spawn_blocking(move || index.settings()).await??; - Ok(settings) - } - - /// Return the total number of documents contained in the index + the selected documents. - pub async fn documents( - &self, - uid: String, - offset: usize, - limit: usize, - attributes_to_retrieve: Option>, - ) -> Result<(u64, Vec)> { - let index = self.index_resolver.get_index(uid).await?; - let result = - spawn_blocking(move || index.retrieve_documents(offset, limit, attributes_to_retrieve)) - .await??; - Ok(result) - } - - pub async fn document( - &self, - uid: String, - doc_id: String, - attributes_to_retrieve: Option>, - ) -> Result { - let index = self.index_resolver.get_index(uid).await?; - let document = - spawn_blocking(move || index.retrieve_document(doc_id, attributes_to_retrieve)) - .await??; - Ok(document) - } - - pub async fn search(&self, uid: String, query: SearchQuery) -> Result { - let index = self.index_resolver.get_index(uid).await?; - let result = spawn_blocking(move || index.perform_search(query)).await??; - Ok(result) - } - - pub async fn get_index(&self, uid: String) -> Result { - let index = self.index_resolver.get_index(uid.clone()).await?; - let uuid = index.uuid(); - let meta = spawn_blocking(move || index.meta()).await??; - let meta = IndexMetadata { uuid, uid, meta }; - Ok(meta) - } - - pub async fn get_index_stats(&self, uid: String) -> Result { - let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?; - // Check if the currently indexing update is from our index. - let is_indexing = processing_tasks - .first() - .map_or(false, |task| task.index_uid().map_or(false, |u| u == uid)); - - let index = self.index_resolver.get_index(uid).await?; - let mut stats = spawn_blocking(move || index.stats()).await??; - stats.is_indexing = Some(is_indexing); - - Ok(stats) - } - - pub async fn get_all_stats(&self, search_rules: &SearchRules) -> Result { - let mut last_task: Option = None; - let mut indexes = BTreeMap::new(); - let mut database_size = 0; - let processing_tasks = self.scheduler.read().await.get_processing_tasks().await?; - - for (index_uid, index) in self.index_resolver.list().await? { - if !search_rules.is_index_authorized(&index_uid) { - continue; - } - - let (mut stats, meta) = - spawn_blocking::<_, Result<(IndexStats, IndexMeta)>>(move || { - Ok((index.stats()?, index.meta()?)) - }) - .await??; - - database_size += stats.size; - - last_task = last_task.map_or(Some(meta.updated_at), |last| { - Some(last.max(meta.updated_at)) - }); - - // Check if the currently indexing update is from our index. - stats.is_indexing = processing_tasks - .first() - .and_then(|p| p.index_uid().map(|u| u == index_uid)) - .or(Some(false)); - - indexes.insert(index_uid, stats); - } - - Ok(Stats { - database_size, - last_update: last_task, - indexes, - }) - } -} - -pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { - loop { - match Arc::try_unwrap(item) { - Ok(item) => return item, - Err(item_arc) => { - item = item_arc; - sleep(Duration::from_millis(100)).await; - continue; - } - } - } -} - -// Clamp the provided value to be a multiple of system page size. -fn clamp_to_page_size(size: usize) -> usize { - size / page_size::get() * page_size::get() -} - -#[cfg(test)] -mod test { - use futures::future::ok; - use mockall::predicate::eq; - use nelson::Mocker; - - use crate::index::error::Result as IndexResult; - use crate::index::Index; - use crate::index::{ - DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, - }; - use crate::index_resolver::index_store::MockIndexStore; - use crate::index_resolver::meta_store::MockIndexMetaStore; - use crate::index_resolver::IndexResolver; - - use super::*; - - impl IndexController { - pub fn mock( - index_resolver: Arc>, - task_store: TaskStore, - update_file_store: UpdateFileStore, - scheduler: Arc>, - ) -> Self { - IndexController { - index_resolver, - task_store, - update_file_store, - scheduler, - } - } - } - - #[actix_rt::test] - async fn test_search_simple() { - let index_uid = "test"; - let index_uuid = Uuid::new_v4(); - let query = SearchQuery { - q: Some(String::from("hello world")), - offset: Some(10), - limit: 0, - attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()), - attributes_to_crop: None, - crop_length: 18, - attributes_to_highlight: None, - show_matches_position: true, - filter: None, - sort: None, - facets: None, - highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), - highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), - crop_marker: DEFAULT_CROP_MARKER(), - matching_strategy: Default::default(), - }; - - let result = SearchResult { - hits: vec![], - estimated_total_hits: 29, - query: "hello world".to_string(), - limit: 24, - offset: 0, - processing_time_ms: 50, - facet_distribution: None, - }; - - let mut uuid_store = MockIndexMetaStore::new(); - uuid_store - .expect_get() - .with(eq(index_uid.to_owned())) - .returning(move |s| { - Box::pin(ok(( - s, - Some(crate::index_resolver::meta_store::IndexMeta { - uuid: index_uuid, - creation_task_id: 0, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - let result_clone = result.clone(); - let query_clone = query.clone(); - index_store - .expect_get() - .with(eq(index_uuid)) - .returning(move |_uuid| { - let result = result_clone.clone(); - let query = query_clone.clone(); - let mocker = Mocker::default(); - mocker - .when::>("perform_search") - .once() - .then(move |q| { - assert_eq!(&q, &query); - Ok(result.clone()) - }); - let index = Index::mock(mocker); - Box::pin(ok(Some(index))) - }); - - let task_store_mocker = nelson::Mocker::default(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = Arc::new(IndexResolver::new( - uuid_store, - index_store, - update_file_store.clone(), - )); - let task_store = TaskStore::mock(task_store_mocker); - let scheduler = Scheduler::new( - task_store.clone(), - vec![index_resolver.clone()], - SchedulerConfig::default(), - ) - .unwrap(); - let index_controller = - IndexController::mock(index_resolver, task_store, update_file_store, scheduler); - - let r = index_controller - .search(index_uid.to_owned(), query.clone()) - .await - .unwrap(); - assert_eq!(r, result); - } -} diff --git a/meilisearch-lib/src/index_controller/updates/error.rs b/meilisearch-lib/src/index_controller/updates/error.rs deleted file mode 100644 index 7ecaa45c5..000000000 --- a/meilisearch-lib/src/index_controller/updates/error.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::error::Error; -use std::fmt; - -use meilisearch_types::{internal_error, Code, ErrorCode}; - -use crate::{ - document_formats::DocumentFormatError, - index::error::IndexError, - index_controller::{update_file_store::UpdateFileStoreError, DocumentAdditionFormat}, -}; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -#[allow(clippy::large_enum_variant)] -pub enum UpdateLoopError { - #[error("Task `{0}` not found.")] - UnexistingUpdate(u64), - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error( - "update store was shut down due to a fatal error, please check your logs for more info." - )] - FatalUpdateStoreError, - #[error("{0}")] - DocumentFormatError(#[from] DocumentFormatError), - #[error("The provided payload reached the size limit.")] - PayloadTooLarge, - #[error("A {0} payload is missing.")] - MissingPayload(DocumentAdditionFormat), - #[error("{0}")] - IndexError(#[from] IndexError), -} - -impl From> for UpdateLoopError -where - T: Sync + Send + 'static + fmt::Debug, -{ - fn from(other: tokio::sync::mpsc::error::SendError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for UpdateLoopError { - fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for UpdateLoopError { - fn from(other: actix_web::error::PayloadError) -> Self { - match other { - actix_web::error::PayloadError::Overflow => Self::PayloadTooLarge, - _ => Self::Internal(Box::new(other)), - } - } -} - -internal_error!( - UpdateLoopError: heed::Error, - std::io::Error, - serde_json::Error, - tokio::task::JoinError, - UpdateFileStoreError -); - -impl ErrorCode for UpdateLoopError { - fn error_code(&self) -> Code { - match self { - Self::UnexistingUpdate(_) => Code::TaskNotFound, - Self::Internal(_) => Code::Internal, - Self::FatalUpdateStoreError => Code::Internal, - Self::DocumentFormatError(error) => error.error_code(), - Self::PayloadTooLarge => Code::PayloadTooLarge, - Self::MissingPayload(_) => Code::MissingPayload, - Self::IndexError(e) => e.error_code(), - } - } -} diff --git a/meilisearch-lib/src/index_controller/versioning/error.rs b/meilisearch-lib/src/index_controller/versioning/error.rs deleted file mode 100644 index ba284ec91..000000000 --- a/meilisearch-lib/src/index_controller/versioning/error.rs +++ /dev/null @@ -1,19 +0,0 @@ -#[derive(thiserror::Error, Debug)] -pub enum VersionFileError { - #[error( - "Meilisearch (v{}) failed to infer the version of the database. Please consider using a dump to load your data.", - env!("CARGO_PKG_VERSION").to_string() - )] - MissingVersionFile, - #[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")] - MalformedVersionFile, - #[error( - "Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. To update Meilisearch use a dump.", - env!("CARGO_PKG_VERSION").to_string() - )] - VersionMismatch { - major: String, - minor: String, - patch: String, - }, -} diff --git a/meilisearch-lib/src/index_controller/versioning/mod.rs b/meilisearch-lib/src/index_controller/versioning/mod.rs deleted file mode 100644 index f2c83bdad..000000000 --- a/meilisearch-lib/src/index_controller/versioning/mod.rs +++ /dev/null @@ -1,56 +0,0 @@ -use std::fs; -use std::io::ErrorKind; -use std::path::Path; - -use self::error::VersionFileError; - -mod error; - -pub const VERSION_FILE_NAME: &str = "VERSION"; - -static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR"); -static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); -static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); - -// Persists the version of the current Meilisearch binary to a VERSION file -pub fn create_version_file(db_path: &Path) -> anyhow::Result<()> { - let version_path = db_path.join(VERSION_FILE_NAME); - fs::write( - version_path, - format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH), - )?; - - Ok(()) -} - -// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch. -pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { - let version_path = db_path.join(VERSION_FILE_NAME); - - match fs::read_to_string(&version_path) { - Ok(version) => { - let version_components = version.split('.').collect::>(); - let (major, minor, patch) = match &version_components[..] { - [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), - _ => return Err(VersionFileError::MalformedVersionFile.into()), - }; - - if major != VERSION_MAJOR || minor != VERSION_MINOR { - return Err(VersionFileError::VersionMismatch { - major, - minor, - patch, - } - .into()); - } - } - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()), - _ => Err(error.into()), - } - } - } - - Ok(()) -} diff --git a/meilisearch-lib/src/index_resolver/error.rs b/meilisearch-lib/src/index_resolver/error.rs deleted file mode 100644 index d973d2229..000000000 --- a/meilisearch-lib/src/index_resolver/error.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::fmt; - -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::index_uid::IndexUidFormatError; -use meilisearch_types::internal_error; -use tokio::sync::mpsc::error::SendError as MpscSendError; -use tokio::sync::oneshot::error::RecvError as OneshotRecvError; -use uuid::Uuid; - -use crate::{error::MilliError, index::error::IndexError, update_file_store::UpdateFileStoreError}; - -pub type Result = std::result::Result; - -#[derive(thiserror::Error, Debug)] -pub enum IndexResolverError { - #[error("{0}")] - IndexError(#[from] IndexError), - #[error("Index `{0}` already exists.")] - IndexAlreadyExists(String), - #[error("Index `{0}` not found.")] - UnexistingIndex(String), - #[error("A primary key is already present. It's impossible to update it")] - ExistingPrimaryKey, - #[error("An internal error has occurred. `{0}`.")] - Internal(Box), - #[error("The creation of the `{0}` index has failed due to `Index uuid is already assigned`.")] - UuidAlreadyExists(Uuid), - #[error("{0}")] - Milli(#[from] milli::Error), - #[error("{0}")] - BadlyFormatted(#[from] IndexUidFormatError), -} - -impl From> for IndexResolverError -where - T: Send + Sync + 'static + fmt::Debug, -{ - fn from(other: tokio::sync::mpsc::error::SendError) -> Self { - Self::Internal(Box::new(other)) - } -} - -impl From for IndexResolverError { - fn from(other: tokio::sync::oneshot::error::RecvError) -> Self { - Self::Internal(Box::new(other)) - } -} - -internal_error!( - IndexResolverError: milli::heed::Error, - uuid::Error, - std::io::Error, - tokio::task::JoinError, - serde_json::Error, - UpdateFileStoreError -); - -impl ErrorCode for IndexResolverError { - fn error_code(&self) -> Code { - match self { - IndexResolverError::IndexError(e) => e.error_code(), - IndexResolverError::IndexAlreadyExists(_) => Code::IndexAlreadyExists, - IndexResolverError::UnexistingIndex(_) => Code::IndexNotFound, - IndexResolverError::ExistingPrimaryKey => Code::PrimaryKeyAlreadyPresent, - IndexResolverError::Internal(_) => Code::Internal, - IndexResolverError::UuidAlreadyExists(_) => Code::CreateIndex, - IndexResolverError::Milli(e) => MilliError(e).error_code(), - IndexResolverError::BadlyFormatted(_) => Code::InvalidIndexUid, - } - } -} diff --git a/meilisearch-lib/src/index_resolver/index_store.rs b/meilisearch-lib/src/index_resolver/index_store.rs deleted file mode 100644 index e4f58f130..000000000 --- a/meilisearch-lib/src/index_resolver/index_store.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::collections::HashMap; -use std::convert::TryFrom; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use milli::update::IndexerConfig; -use tokio::fs; -use tokio::sync::RwLock; -use tokio::task::spawn_blocking; -use uuid::Uuid; - -use super::error::{IndexResolverError, Result}; -use crate::index::Index; -use crate::options::IndexerOpts; - -type AsyncMap = Arc>>; - -#[async_trait::async_trait] -#[cfg_attr(test, mockall::automock)] -pub trait IndexStore { - async fn create(&self, uuid: Uuid) -> Result; - async fn get(&self, uuid: Uuid) -> Result>; - async fn delete(&self, uuid: Uuid) -> Result>; -} - -pub struct MapIndexStore { - index_store: AsyncMap, - path: PathBuf, - index_size: usize, - indexer_config: Arc, -} - -impl MapIndexStore { - pub fn new( - path: impl AsRef, - index_size: usize, - indexer_opts: &IndexerOpts, - ) -> anyhow::Result { - let indexer_config = Arc::new(IndexerConfig::try_from(indexer_opts)?); - let path = path.as_ref().join("indexes/"); - let index_store = Arc::new(RwLock::new(HashMap::new())); - Ok(Self { - index_store, - path, - index_size, - indexer_config, - }) - } -} - -#[async_trait::async_trait] -impl IndexStore for MapIndexStore { - async fn create(&self, uuid: Uuid) -> Result { - // We need to keep the lock until we are sure the db file has been opened correclty, to - // ensure that another db is not created at the same time. - let mut lock = self.index_store.write().await; - - if let Some(index) = lock.get(&uuid) { - return Ok(index.clone()); - } - let path = self.path.join(format!("{}", uuid)); - if path.exists() { - return Err(IndexResolverError::UuidAlreadyExists(uuid)); - } - - let index_size = self.index_size; - let update_handler = self.indexer_config.clone(); - let index = spawn_blocking(move || -> Result { - let index = Index::open(path, index_size, uuid, update_handler)?; - Ok(index) - }) - .await??; - - lock.insert(uuid, index.clone()); - - Ok(index) - } - - async fn get(&self, uuid: Uuid) -> Result> { - let guard = self.index_store.read().await; - match guard.get(&uuid) { - Some(index) => Ok(Some(index.clone())), - None => { - // drop the guard here so we can perform the write after without deadlocking; - drop(guard); - let path = self.path.join(format!("{}", uuid)); - if !path.exists() { - return Ok(None); - } - - let index_size = self.index_size; - let update_handler = self.indexer_config.clone(); - let index = - spawn_blocking(move || Index::open(path, index_size, uuid, update_handler)) - .await??; - self.index_store.write().await.insert(uuid, index.clone()); - Ok(Some(index)) - } - } - } - - async fn delete(&self, uuid: Uuid) -> Result> { - let db_path = self.path.join(format!("{}", uuid)); - fs::remove_dir_all(db_path).await?; - let index = self.index_store.write().await.remove(&uuid); - Ok(index) - } -} diff --git a/meilisearch-lib/src/index_resolver/meta_store.rs b/meilisearch-lib/src/index_resolver/meta_store.rs deleted file mode 100644 index f335d9923..000000000 --- a/meilisearch-lib/src/index_resolver/meta_store.rs +++ /dev/null @@ -1,223 +0,0 @@ -use std::collections::HashSet; -use std::fs::{create_dir_all, File}; -use std::io::{BufRead, BufReader, Write}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use walkdir::WalkDir; - -use milli::heed::types::{SerdeBincode, Str}; -use milli::heed::{CompactionOption, Database, Env}; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -use super::error::{IndexResolverError, Result}; -use crate::tasks::task::TaskId; - -#[derive(Serialize, Deserialize)] -pub struct DumpEntry { - pub uid: String, - pub index_meta: IndexMeta, -} - -const UUIDS_DB_PATH: &str = "index_uuids"; - -#[async_trait::async_trait] -#[cfg_attr(test, mockall::automock)] -pub trait IndexMetaStore: Sized { - // Create a new entry for `name`. Return an error if `err` and the entry already exists, return - // the uuid otherwise. - async fn get(&self, uid: String) -> Result<(String, Option)>; - async fn delete(&self, uid: String) -> Result>; - async fn list(&self) -> Result>; - async fn insert(&self, name: String, meta: IndexMeta) -> Result<()>; - async fn snapshot(&self, path: PathBuf) -> Result>; - async fn get_size(&self) -> Result; - async fn dump(&self, path: PathBuf) -> Result<()>; -} - -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct IndexMeta { - pub uuid: Uuid, - pub creation_task_id: TaskId, -} - -#[derive(Clone)] -pub struct HeedMetaStore { - env: Arc, - db: Database>, -} - -impl Drop for HeedMetaStore { - fn drop(&mut self) { - if Arc::strong_count(&self.env) == 1 { - self.env.as_ref().clone().prepare_for_closing(); - } - } -} - -impl HeedMetaStore { - pub fn new(env: Arc) -> Result { - let db = env.create_database(Some("uuids"))?; - Ok(Self { env, db }) - } - - fn get(&self, name: &str) -> Result> { - let env = self.env.clone(); - let db = self.db; - let txn = env.read_txn()?; - match db.get(&txn, name)? { - Some(meta) => Ok(Some(meta)), - None => Ok(None), - } - } - - fn delete(&self, uid: String) -> Result> { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; - match db.get(&txn, &uid)? { - Some(meta) => { - db.delete(&mut txn, &uid)?; - txn.commit()?; - Ok(Some(meta)) - } - None => Ok(None), - } - } - - fn list(&self) -> Result> { - let env = self.env.clone(); - let db = self.db; - let txn = env.read_txn()?; - let mut entries = Vec::new(); - for entry in db.iter(&txn)? { - let (name, meta) = entry?; - entries.push((name.to_string(), meta)) - } - Ok(entries) - } - - pub(crate) fn insert(&self, name: String, meta: IndexMeta) -> Result<()> { - let env = self.env.clone(); - let db = self.db; - let mut txn = env.write_txn()?; - - if db.get(&txn, &name)?.is_some() { - return Err(IndexResolverError::IndexAlreadyExists(name)); - } - - db.put(&mut txn, &name, &meta)?; - txn.commit()?; - Ok(()) - } - - fn snapshot(&self, mut path: PathBuf) -> Result> { - // Write transaction to acquire a lock on the database. - let txn = self.env.write_txn()?; - let mut entries = HashSet::new(); - for entry in self.db.iter(&txn)? { - let (_, IndexMeta { uuid, .. }) = entry?; - entries.insert(uuid); - } - - // only perform snapshot if there are indexes - if !entries.is_empty() { - path.push(UUIDS_DB_PATH); - create_dir_all(&path).unwrap(); - path.push("data.mdb"); - self.env.copy_to_path(path, CompactionOption::Enabled)?; - } - Ok(entries) - } - - fn get_size(&self) -> Result { - Ok(WalkDir::new(self.env.path()) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len())) - } - - pub fn dump(&self, path: PathBuf) -> Result<()> { - let dump_path = path.join(UUIDS_DB_PATH); - create_dir_all(&dump_path)?; - let dump_file_path = dump_path.join("data.jsonl"); - let mut dump_file = File::create(&dump_file_path)?; - - let txn = self.env.read_txn()?; - for entry in self.db.iter(&txn)? { - let (uid, index_meta) = entry?; - let uid = uid.to_string(); - - let entry = DumpEntry { uid, index_meta }; - serde_json::to_writer(&mut dump_file, &entry)?; - dump_file.write_all(b"\n").unwrap(); - } - - Ok(()) - } - - pub fn load_dump(src: impl AsRef, env: Arc) -> Result<()> { - let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); - let indexes = File::open(&src_indexes)?; - let mut indexes = BufReader::new(indexes); - let mut line = String::new(); - - let db = Self::new(env)?; - let mut txn = db.env.write_txn()?; - - loop { - match indexes.read_line(&mut line) { - Ok(0) => break, - Ok(_) => { - let DumpEntry { uid, index_meta } = serde_json::from_str(&line)?; - db.db.put(&mut txn, &uid, &index_meta)?; - } - Err(e) => return Err(e.into()), - } - - line.clear(); - } - txn.commit()?; - - Ok(()) - } -} - -#[async_trait::async_trait] -impl IndexMetaStore for HeedMetaStore { - async fn get(&self, name: String) -> Result<(String, Option)> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.get(&name).map(|res| (name, res))).await? - } - - async fn delete(&self, uid: String) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.delete(uid)).await? - } - - async fn list(&self) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.list()).await? - } - - async fn insert(&self, name: String, meta: IndexMeta) -> Result<()> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.insert(name, meta)).await? - } - - async fn snapshot(&self, path: PathBuf) -> Result> { - let this = self.clone(); - tokio::task::spawn_blocking(move || this.snapshot(path)).await? - } - - async fn get_size(&self) -> Result { - self.get_size() - } - - async fn dump(&self, path: PathBuf) -> Result<()> { - let this = self.clone(); - Ok(tokio::task::spawn_blocking(move || this.dump(path)).await??) - } -} diff --git a/meilisearch-lib/src/index_resolver/mod.rs b/meilisearch-lib/src/index_resolver/mod.rs deleted file mode 100644 index 284f64942..000000000 --- a/meilisearch-lib/src/index_resolver/mod.rs +++ /dev/null @@ -1,685 +0,0 @@ -pub mod error; -pub mod index_store; -pub mod meta_store; - -use std::convert::TryFrom; -use std::path::Path; -use std::sync::Arc; - -use error::{IndexResolverError, Result}; -use index_store::{IndexStore, MapIndexStore}; -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use meta_store::{HeedMetaStore, IndexMetaStore}; -use milli::heed::Env; -use milli::update::{DocumentDeletionResult, IndexerConfig}; -use time::OffsetDateTime; -use tokio::task::spawn_blocking; -use uuid::Uuid; - -use crate::index::{error::Result as IndexResult, Index}; -use crate::options::IndexerOpts; -use crate::tasks::task::{DocumentDeletion, Task, TaskContent, TaskEvent, TaskId, TaskResult}; -use crate::update_file_store::UpdateFileStore; - -use self::meta_store::IndexMeta; - -pub type HardStateIndexResolver = IndexResolver; - -#[cfg(not(test))] -pub use real::IndexResolver; - -#[cfg(test)] -pub use test::MockIndexResolver as IndexResolver; - -pub fn create_index_resolver( - path: impl AsRef, - index_size: usize, - indexer_opts: &IndexerOpts, - meta_env: Arc, - file_store: UpdateFileStore, -) -> anyhow::Result { - let uuid_store = HeedMetaStore::new(meta_env)?; - let index_store = MapIndexStore::new(&path, index_size, indexer_opts)?; - Ok(IndexResolver::new(uuid_store, index_store, file_store)) -} - -mod real { - use super::*; - - pub struct IndexResolver { - pub(super) index_uuid_store: U, - pub(super) index_store: I, - pub(super) file_store: UpdateFileStore, - } - - impl IndexResolver { - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - env: Arc, - indexer_opts: &IndexerOpts, - ) -> anyhow::Result<()> { - HeedMetaStore::load_dump(&src, env)?; - let indexes_path = src.as_ref().join("indexes"); - let indexes = indexes_path.read_dir()?; - let indexer_config = IndexerConfig::try_from(indexer_opts)?; - for index in indexes { - Index::load_dump(&index?.path(), &dst, index_db_size, &indexer_config)?; - } - - Ok(()) - } - } - - impl IndexResolver - where - U: IndexMetaStore, - I: IndexStore, - { - pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self { - Self { - index_uuid_store, - index_store, - file_store, - } - } - - pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) { - fn get_content_uuid(task: &Task) -> Uuid { - match task { - Task { - content: TaskContent::DocumentAddition { content_uuid, .. }, - .. - } => *content_uuid, - _ => panic!("unexpected task in the document addition batch"), - } - } - - let content_uuids = tasks.iter().map(get_content_uuid).collect::>(); - - match tasks.first() { - Some(Task { - id, - content: - TaskContent::DocumentAddition { - merge_strategy, - primary_key, - allow_index_creation, - index_uid, - .. - }, - .. - }) => { - let primary_key = primary_key.clone(); - let method = *merge_strategy; - - let index = if *allow_index_creation { - self.get_or_create_index(index_uid.clone(), *id).await - } else { - self.get_index(index_uid.as_str().to_string()).await - }; - - // If the index doesn't exist and we are not allowed to create it with the first - // task, we must fails the whole batch. - let now = OffsetDateTime::now_utc(); - let index = match index { - Ok(index) => index, - Err(e) => { - let error = ResponseError::from(e); - for task in tasks.iter_mut() { - task.events.push(TaskEvent::Failed { - error: error.clone(), - timestamp: now, - }); - } - - return; - } - }; - - let file_store = self.file_store.clone(); - let result = spawn_blocking(move || { - index.update_documents( - method, - primary_key, - file_store, - content_uuids.into_iter(), - ) - }) - .await; - - match result { - Ok(Ok(results)) => { - for (task, result) in tasks.iter_mut().zip(results) { - let event = match result { - Ok(addition) => { - TaskEvent::succeeded(TaskResult::DocumentAddition { - indexed_documents: addition.indexed_documents, - }) - } - Err(error) => { - TaskEvent::failed(IndexResolverError::from(error)) - } - }; - task.events.push(event); - } - } - Ok(Err(e)) => { - let event = TaskEvent::failed(e); - for task in tasks.iter_mut() { - task.events.push(event.clone()); - } - } - Err(e) => { - let event = TaskEvent::failed(IndexResolverError::from(e)); - for task in tasks.iter_mut() { - task.events.push(event.clone()); - } - } - } - } - _ => panic!("invalid batch!"), - } - } - - pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> { - self.file_store.delete(content_uuid).await?; - Ok(()) - } - - async fn process_task_inner(&self, task: &Task) -> Result { - match &task.content { - TaskContent::DocumentAddition { .. } => { - panic!("updates should be handled by batch") - } - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Ids(ids), - index_uid, - } => { - let ids = ids.clone(); - let index = self.get_index(index_uid.clone().into_inner()).await?; - - let DocumentDeletionResult { - deleted_documents, .. - } = spawn_blocking(move || index.delete_documents(&ids)).await??; - - Ok(TaskResult::DocumentDeletion { deleted_documents }) - } - TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Clear, - index_uid, - } => { - let index = self.get_index(index_uid.clone().into_inner()).await?; - let deleted_documents = spawn_blocking(move || -> IndexResult { - let number_documents = index.stats()?.number_of_documents; - index.clear_documents()?; - Ok(number_documents) - }) - .await??; - - Ok(TaskResult::ClearAll { deleted_documents }) - } - TaskContent::SettingsUpdate { - settings, - is_deletion, - allow_index_creation, - index_uid, - } => { - let index = if *is_deletion || !*allow_index_creation { - self.get_index(index_uid.clone().into_inner()).await? - } else { - self.get_or_create_index(index_uid.clone(), task.id).await? - }; - - let settings = settings.clone(); - spawn_blocking(move || index.update_settings(&settings.check())).await??; - - Ok(TaskResult::Other) - } - TaskContent::IndexDeletion { index_uid } => { - let index = self.delete_index(index_uid.clone().into_inner()).await?; - - let deleted_documents = spawn_blocking(move || -> IndexResult { - Ok(index.stats()?.number_of_documents) - }) - .await??; - - Ok(TaskResult::ClearAll { deleted_documents }) - } - TaskContent::IndexCreation { - primary_key, - index_uid, - } => { - let index = self.create_index(index_uid.clone(), task.id).await?; - - if let Some(primary_key) = primary_key { - let primary_key = primary_key.clone(); - spawn_blocking(move || index.update_primary_key(primary_key)).await??; - } - - Ok(TaskResult::Other) - } - TaskContent::IndexUpdate { - primary_key, - index_uid, - } => { - let index = self.get_index(index_uid.clone().into_inner()).await?; - - if let Some(primary_key) = primary_key { - let primary_key = primary_key.clone(); - spawn_blocking(move || index.update_primary_key(primary_key)).await??; - } - - Ok(TaskResult::Other) - } - _ => unreachable!("Invalid task for index resolver"), - } - } - - pub async fn process_task(&self, task: &mut Task) { - match self.process_task_inner(task).await { - Ok(res) => task.events.push(TaskEvent::succeeded(res)), - Err(e) => task.events.push(TaskEvent::failed(e)), - } - } - - pub async fn dump(&self, path: impl AsRef) -> Result<()> { - for (_, index) in self.list().await? { - index.dump(&path)?; - } - self.index_uuid_store.dump(path.as_ref().to_owned()).await?; - Ok(()) - } - - async fn create_index(&self, uid: IndexUid, creation_task_id: TaskId) -> Result { - match self.index_uuid_store.get(uid.into_inner()).await? { - (uid, Some(_)) => Err(IndexResolverError::IndexAlreadyExists(uid)), - (uid, None) => { - let uuid = Uuid::new_v4(); - let index = self.index_store.create(uuid).await?; - match self - .index_uuid_store - .insert( - uid, - IndexMeta { - uuid, - creation_task_id, - }, - ) - .await - { - Err(e) => { - match self.index_store.delete(uuid).await { - Ok(Some(index)) => { - index.close(); - } - Ok(None) => (), - Err(e) => log::error!("Error while deleting index: {:?}", e), - } - Err(e) - } - Ok(()) => Ok(index), - } - } - } - } - - /// Get or create an index with name `uid`. - pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result { - match self.create_index(uid, task_id).await { - Ok(index) => Ok(index), - Err(IndexResolverError::IndexAlreadyExists(uid)) => self.get_index(uid).await, - Err(e) => Err(e), - } - } - - pub async fn list(&self) -> Result> { - let uuids = self.index_uuid_store.list().await?; - let mut indexes = Vec::new(); - for (name, IndexMeta { uuid, .. }) in uuids { - match self.index_store.get(uuid).await? { - Some(index) => indexes.push((name, index)), - None => { - // we found an unexisting index, we remove it from the uuid store - let _ = self.index_uuid_store.delete(name).await; - } - } - } - - Ok(indexes) - } - - pub async fn delete_index(&self, uid: String) -> Result { - match self.index_uuid_store.delete(uid.clone()).await? { - Some(IndexMeta { uuid, .. }) => match self.index_store.delete(uuid).await? { - Some(index) => { - index.clone().close(); - Ok(index) - } - None => Err(IndexResolverError::UnexistingIndex(uid)), - }, - None => Err(IndexResolverError::UnexistingIndex(uid)), - } - } - - pub async fn get_index(&self, uid: String) -> Result { - match self.index_uuid_store.get(uid).await? { - (name, Some(IndexMeta { uuid, .. })) => { - match self.index_store.get(uuid).await? { - Some(index) => Ok(index), - None => { - // For some reason we got a uuid to an unexisting index, we return an error, - // and remove the uuid from the uuid store. - let _ = self.index_uuid_store.delete(name.clone()).await; - Err(IndexResolverError::UnexistingIndex(name)) - } - } - } - (name, _) => Err(IndexResolverError::UnexistingIndex(name)), - } - } - - pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result { - let (uid, meta) = self.index_uuid_store.get(index_uid).await?; - meta.map( - |IndexMeta { - creation_task_id, .. - }| creation_task_id, - ) - .ok_or(IndexResolverError::UnexistingIndex(uid)) - } - } -} - -#[cfg(test)] -mod test { - use crate::index::IndexStats; - - use super::index_store::MockIndexStore; - use super::meta_store::MockIndexMetaStore; - use super::*; - - use futures::future::ok; - use milli::FieldDistribution; - use nelson::Mocker; - - pub enum MockIndexResolver { - Real(super::real::IndexResolver), - Mock(Mocker), - } - - impl MockIndexResolver { - pub fn load_dump( - src: impl AsRef, - dst: impl AsRef, - index_db_size: usize, - env: Arc, - indexer_opts: &IndexerOpts, - ) -> anyhow::Result<()> { - super::real::IndexResolver::load_dump(src, dst, index_db_size, env, indexer_opts) - } - } - - impl MockIndexResolver - where - U: IndexMetaStore, - I: IndexStore, - { - pub fn new(index_uuid_store: U, index_store: I, file_store: UpdateFileStore) -> Self { - Self::Real(super::real::IndexResolver { - index_uuid_store, - index_store, - file_store, - }) - } - - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(mocker) - } - - pub async fn process_document_addition_batch(&self, tasks: &mut [Task]) { - match self { - IndexResolver::Real(r) => r.process_document_addition_batch(tasks).await, - IndexResolver::Mock(m) => unsafe { - m.get("process_document_addition_batch").call(tasks) - }, - } - } - - pub async fn process_task(&self, task: &mut Task) { - match self { - IndexResolver::Real(r) => r.process_task(task).await, - IndexResolver::Mock(m) => unsafe { m.get("process_task").call(task) }, - } - } - - pub async fn dump(&self, path: impl AsRef) -> Result<()> { - match self { - IndexResolver::Real(r) => r.dump(path).await, - IndexResolver::Mock(_) => todo!(), - } - } - - /// Get or create an index with name `uid`. - pub async fn get_or_create_index(&self, uid: IndexUid, task_id: TaskId) -> Result { - match self { - IndexResolver::Real(r) => r.get_or_create_index(uid, task_id).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn list(&self) -> Result> { - match self { - IndexResolver::Real(r) => r.list().await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn delete_index(&self, uid: String) -> Result { - match self { - IndexResolver::Real(r) => r.delete_index(uid).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn get_index(&self, uid: String) -> Result { - match self { - IndexResolver::Real(r) => r.get_index(uid).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn get_index_creation_task_id(&self, index_uid: String) -> Result { - match self { - IndexResolver::Real(r) => r.get_index_creation_task_id(index_uid).await, - IndexResolver::Mock(_) => todo!(), - } - } - - pub async fn delete_content_file(&self, content_uuid: Uuid) -> Result<()> { - match self { - IndexResolver::Real(r) => r.delete_content_file(content_uuid).await, - IndexResolver::Mock(m) => unsafe { - m.get("delete_content_file").call(content_uuid) - }, - } - } - } - - #[actix_rt::test] - async fn test_remove_unknown_index() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store - .expect_delete() - .once() - .returning(|_| Box::pin(ok(None))); - - let index_store = MockIndexStore::new(); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Failed { .. })); - } - - #[actix_rt::test] - async fn test_remove_index() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store.expect_delete().once().returning(|_| { - Box::pin(ok(Some(IndexMeta { - uuid: Uuid::new_v4(), - creation_task_id: 1, - }))) - }); - - let mut index_store = MockIndexStore::new(); - index_store.expect_delete().once().returning(|_| { - let mocker = Mocker::default(); - mocker.when::<(), ()>("close").then(|_| ()); - mocker - .when::<(), IndexResult>("stats") - .then(|_| { - Ok(IndexStats { - size: 10, - number_of_documents: 10, - is_indexing: None, - field_distribution: FieldDistribution::default(), - }) - }); - Box::pin(ok(Some(Index::mock(mocker)))) - }); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Succeeded { .. })); - } - - #[actix_rt::test] - async fn test_delete_documents() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store.expect_get().once().returning(|_| { - Box::pin(ok(( - "test".to_string(), - Some(IndexMeta { - uuid: Uuid::new_v4(), - creation_task_id: 1, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - index_store.expect_get().once().returning(|_| { - let mocker = Mocker::default(); - mocker - .when::<(), IndexResult<()>>("clear_documents") - .once() - .then(|_| Ok(())); - mocker - .when::<(), IndexResult>("stats") - .once() - .then(|_| { - Ok(IndexStats { - size: 10, - number_of_documents: 10, - is_indexing: None, - field_distribution: FieldDistribution::default(), - }) - }); - Box::pin(ok(Some(Index::mock(mocker)))) - }); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::DocumentDeletion { - deletion: DocumentDeletion::Clear, - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Succeeded { .. })); - } - - #[actix_rt::test] - async fn test_index_update() { - let mut meta_store = MockIndexMetaStore::new(); - meta_store.expect_get().once().returning(|_| { - Box::pin(ok(( - "test".to_string(), - Some(IndexMeta { - uuid: Uuid::new_v4(), - creation_task_id: 1, - }), - ))) - }); - - let mut index_store = MockIndexStore::new(); - index_store.expect_get().once().returning(|_| { - let mocker = Mocker::default(); - - mocker - .when::>("update_primary_key") - .once() - .then(|_| { - Ok(crate::index::IndexMeta { - created_at: OffsetDateTime::now_utc(), - updated_at: OffsetDateTime::now_utc(), - primary_key: Some("key".to_string()), - }) - }); - Box::pin(ok(Some(Index::mock(mocker)))) - }); - - let mocker = Mocker::default(); - let file_store = UpdateFileStore::mock(mocker); - - let index_resolver = IndexResolver::new(meta_store, index_store, file_store); - - let mut task = Task { - id: 1, - content: TaskContent::IndexUpdate { - primary_key: Some("key".to_string()), - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - index_resolver.process_task(&mut task).await; - - assert!(matches!(task.events[0], TaskEvent::Succeeded { .. })); - } -} diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs deleted file mode 100644 index 70fd2ba51..000000000 --- a/meilisearch-lib/src/lib.rs +++ /dev/null @@ -1,37 +0,0 @@ -#[macro_use] -pub mod error; -pub mod options; - -mod analytics; -mod dump; -pub mod index; -pub mod index_controller; -mod index_resolver; -mod snapshot; -pub mod tasks; -mod update_file_store; - -use std::path::Path; - -pub use index_controller::MeiliSearch; -pub use milli; -pub use milli::heed; - -mod compression; -pub mod document_formats; - -/// Check if a db is empty. It does not provide any information on the -/// validity of the data in it. -/// We consider a database as non empty when it's a non empty directory. -pub fn is_empty_db(db_path: impl AsRef) -> bool { - let db_path = db_path.as_ref(); - - if !db_path.exists() { - true - // if we encounter an error or if the db is a file we consider the db non empty - } else if let Ok(dir) = db_path.read_dir() { - dir.count() == 0 - } else { - true - } -} diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs deleted file mode 100644 index ea810b9b7..000000000 --- a/meilisearch-lib/src/options.rs +++ /dev/null @@ -1,166 +0,0 @@ -use core::fmt; -use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; - -use byte_unit::{Byte, ByteError}; -use clap::Parser; -use milli::update::IndexerConfig; -use serde::Serialize; -use sysinfo::{RefreshKind, System, SystemExt}; - -#[derive(Debug, Clone, Parser, Serialize)] -pub struct IndexerOpts { - /// The amount of documents to skip before printing - /// a log regarding the indexing advancement. - #[serde(skip)] - #[clap(long, default_value = "100000", hide = true)] // 100k - pub log_every_n: usize, - - /// Grenad max number of chunks in bytes. - #[serde(skip)] - #[clap(long, hide = true)] - pub max_nb_chunks: Option, - - /// The maximum amount of memory the indexer will use. It defaults to 2/3 - /// of the available memory. It is recommended to use something like 80%-90% - /// of the available memory, no more. - /// - /// In case the engine is unable to retrieve the available memory the engine will - /// try to use the memory it needs but without real limit, this can lead to - /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)] - pub max_indexing_memory: MaxMemory, - - /// The maximum number of threads the indexer will use. - /// If the number set is higher than the real number of cores available in the machine, - /// it will use the maximum number of available cores. - /// - /// It defaults to half of the available threads. - #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] - pub max_indexing_threads: MaxThreads, -} - -#[derive(Debug, Clone, Parser, Default, Serialize)] -pub struct SchedulerConfig { - /// The engine will disable task auto-batching, - /// and will sequencialy compute each task one by one. - #[clap(long, env = "DISABLE_AUTO_BATCHING")] - pub disable_auto_batching: bool, -} - -impl TryFrom<&IndexerOpts> for IndexerConfig { - type Error = anyhow::Error; - - fn try_from(other: &IndexerOpts) -> Result { - let thread_pool = rayon::ThreadPoolBuilder::new() - .num_threads(*other.max_indexing_threads) - .build()?; - - Ok(Self { - log_every_n: Some(other.log_every_n), - max_nb_chunks: other.max_nb_chunks, - max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize), - thread_pool: Some(thread_pool), - max_positions_per_attributes: None, - ..Default::default() - }) - } -} - -impl Default for IndexerOpts { - fn default() -> Self { - Self { - log_every_n: 100_000, - max_nb_chunks: None, - max_indexing_memory: MaxMemory::default(), - max_indexing_threads: MaxThreads::default(), - } - } -} - -/// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy, Serialize)] -pub struct MaxMemory(Option); - -impl FromStr for MaxMemory { - type Err = ByteError; - - fn from_str(s: &str) -> Result { - Byte::from_str(s).map(Some).map(MaxMemory) - } -} - -impl Default for MaxMemory { - fn default() -> MaxMemory { - MaxMemory( - total_memory_bytes() - .map(|bytes| bytes * 2 / 3) - .map(Byte::from_bytes), - ) - } -} - -impl fmt::Display for MaxMemory { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - Some(memory) => write!(f, "{}", memory.get_appropriate_unit(true)), - None => f.write_str("unknown"), - } - } -} - -impl Deref for MaxMemory { - type Target = Option; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl MaxMemory { - pub fn unlimited() -> Self { - Self(None) - } -} - -/// Returns the total amount of bytes available or `None` if this system isn't supported. -fn total_memory_bytes() -> Option { - if System::IS_SUPPORTED { - let memory_kind = RefreshKind::new().with_memory(); - let mut system = System::new_with_specifics(memory_kind); - system.refresh_memory(); - Some(system.total_memory() * 1024) // KiB into bytes - } else { - None - } -} - -#[derive(Debug, Clone, Copy, Serialize)] -pub struct MaxThreads(usize); - -impl FromStr for MaxThreads { - type Err = ParseIntError; - - fn from_str(s: &str) -> Result { - usize::from_str(s).map(Self) - } -} - -impl Default for MaxThreads { - fn default() -> Self { - MaxThreads(num_cpus::get() / 2) - } -} - -impl fmt::Display for MaxThreads { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl Deref for MaxThreads { - type Target = usize; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs deleted file mode 100644 index da4907939..000000000 --- a/meilisearch-lib/src/snapshot.rs +++ /dev/null @@ -1,202 +0,0 @@ -use std::fs; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Duration; - -use anyhow::bail; -use fs_extra::dir::{self, CopyOptions}; -use log::{info, trace}; -use meilisearch_auth::open_auth_store_env; -use milli::heed::CompactionOption; -use tokio::sync::RwLock; -use tokio::time::sleep; -use walkdir::WalkDir; - -use crate::compression::from_tar_gz; -use crate::index_controller::open_meta_env; -use crate::index_controller::versioning::VERSION_FILE_NAME; -use crate::tasks::Scheduler; - -pub struct SnapshotService { - pub(crate) db_path: PathBuf, - pub(crate) snapshot_period: Duration, - pub(crate) snapshot_path: PathBuf, - pub(crate) index_size: usize, - pub(crate) meta_env_size: usize, - pub(crate) scheduler: Arc>, -} - -impl SnapshotService { - pub async fn run(self) { - info!( - "Snapshot scheduled every {}s.", - self.snapshot_period.as_secs() - ); - loop { - let snapshot_job = SnapshotJob { - dest_path: self.snapshot_path.clone(), - src_path: self.db_path.clone(), - meta_env_size: self.meta_env_size, - index_size: self.index_size, - }; - self.scheduler.write().await.schedule_snapshot(snapshot_job); - sleep(self.snapshot_period).await; - } - } -} - -pub fn load_snapshot( - db_path: impl AsRef, - snapshot_path: impl AsRef, - ignore_snapshot_if_db_exists: bool, - ignore_missing_snapshot: bool, -) -> anyhow::Result<()> { - let empty_db = crate::is_empty_db(&db_path); - let snapshot_path_exists = snapshot_path.as_ref().exists(); - - if empty_db && snapshot_path_exists { - match from_tar_gz(snapshot_path, &db_path) { - Ok(()) => Ok(()), - Err(e) => { - //clean created db folder - std::fs::remove_dir_all(&db_path)?; - Err(e) - } - } - } else if !empty_db && !ignore_snapshot_if_db_exists { - bail!( - "database already exists at {:?}, try to delete it or rename it", - db_path - .as_ref() - .canonicalize() - .unwrap_or_else(|_| db_path.as_ref().to_owned()) - ) - } else if !snapshot_path_exists && !ignore_missing_snapshot { - bail!("snapshot doesn't exist at {:?}", snapshot_path.as_ref()) - } else { - Ok(()) - } -} - -#[derive(Debug)] -pub struct SnapshotJob { - dest_path: PathBuf, - src_path: PathBuf, - - meta_env_size: usize, - index_size: usize, -} - -impl SnapshotJob { - pub async fn run(self) -> anyhow::Result<()> { - tokio::task::spawn_blocking(|| self.run_sync()).await??; - - Ok(()) - } - - fn run_sync(self) -> anyhow::Result<()> { - trace!("Performing snapshot."); - - let snapshot_dir = self.dest_path.clone(); - std::fs::create_dir_all(&snapshot_dir)?; - let temp_snapshot_dir = tempfile::tempdir()?; - let temp_snapshot_path = temp_snapshot_dir.path(); - - self.snapshot_version_file(temp_snapshot_path)?; - self.snapshot_meta_env(temp_snapshot_path)?; - self.snapshot_file_store(temp_snapshot_path)?; - self.snapshot_indexes(temp_snapshot_path)?; - self.snapshot_auth(temp_snapshot_path)?; - - let db_name = self - .src_path - .file_name() - .and_then(|n| n.to_str()) - .unwrap_or("data.ms") - .to_string(); - - let snapshot_path = self.dest_path.join(format!("{}.snapshot", db_name)); - let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?; - let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); - crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; - let _file = temp_snapshot_file.persist(&snapshot_path)?; - - #[cfg(unix)] - { - use std::fs::Permissions; - use std::os::unix::fs::PermissionsExt; - - let perm = Permissions::from_mode(0o644); - _file.set_permissions(perm)?; - } - - trace!("Created snapshot in {:?}.", snapshot_path); - - Ok(()) - } - - fn snapshot_version_file(&self, path: &Path) -> anyhow::Result<()> { - let dst = path.join(VERSION_FILE_NAME); - let src = self.src_path.join(VERSION_FILE_NAME); - - fs::copy(src, dst)?; - - Ok(()) - } - - fn snapshot_meta_env(&self, path: &Path) -> anyhow::Result<()> { - let env = open_meta_env(&self.src_path, self.meta_env_size)?; - - let dst = path.join("data.mdb"); - env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; - - Ok(()) - } - - fn snapshot_file_store(&self, path: &Path) -> anyhow::Result<()> { - // for now we simply copy the updates/updates_files - // FIXME(marin): We may copy more files than necessary, if new files are added while we are - // performing the snapshop. We need a way to filter them out. - - let dst = path.join("updates"); - fs::create_dir_all(&dst)?; - let options = CopyOptions::default(); - dir::copy(self.src_path.join("updates/updates_files"), dst, &options)?; - - Ok(()) - } - - fn snapshot_indexes(&self, path: &Path) -> anyhow::Result<()> { - let indexes_path = self.src_path.join("indexes/"); - let dst = path.join("indexes/"); - - for entry in WalkDir::new(indexes_path).max_depth(1).into_iter().skip(1) { - let entry = entry?; - let name = entry.file_name(); - let dst = dst.join(name); - - std::fs::create_dir_all(&dst)?; - - let dst = dst.join("data.mdb"); - - let mut options = milli::heed::EnvOpenOptions::new(); - options.map_size(self.index_size); - let index = milli::Index::new(options, entry.path())?; - index.copy_to_path(dst, CompactionOption::Enabled)?; - } - - Ok(()) - } - - fn snapshot_auth(&self, path: &Path) -> anyhow::Result<()> { - let auth_path = self.src_path.join("auth"); - let dst = path.join("auth"); - std::fs::create_dir_all(&dst)?; - let dst = dst.join("data.mdb"); - - let env = open_auth_store_env(&auth_path)?; - env.copy_to_path(dst, milli::heed::CompactionOption::Enabled)?; - - Ok(()) - } -} diff --git a/meilisearch-lib/src/tasks/batch.rs b/meilisearch-lib/src/tasks/batch.rs deleted file mode 100644 index 5fa2e224a..000000000 --- a/meilisearch-lib/src/tasks/batch.rs +++ /dev/null @@ -1,75 +0,0 @@ -use time::OffsetDateTime; - -use crate::snapshot::SnapshotJob; - -use super::task::{Task, TaskEvent}; - -pub type BatchId = u32; - -#[derive(Debug)] -pub enum BatchContent { - DocumentsAdditionBatch(Vec), - IndexUpdate(Task), - Dump(Task), - Snapshot(SnapshotJob), - // Symbolizes a empty batch. This can occur when we were woken, but there wasn't any work to do. - Empty, -} - -impl BatchContent { - pub fn first(&self) -> Option<&Task> { - match self { - BatchContent::DocumentsAdditionBatch(ts) => ts.first(), - BatchContent::Dump(t) | BatchContent::IndexUpdate(t) => Some(t), - BatchContent::Snapshot(_) | BatchContent::Empty => None, - } - } - - pub fn push_event(&mut self, event: TaskEvent) { - match self { - BatchContent::DocumentsAdditionBatch(ts) => { - ts.iter_mut().for_each(|t| t.events.push(event.clone())) - } - BatchContent::IndexUpdate(t) | BatchContent::Dump(t) => t.events.push(event), - BatchContent::Snapshot(_) | BatchContent::Empty => (), - } - } -} - -#[derive(Debug)] -pub struct Batch { - // Only batches that contains a persistent tasks are given an id. Snapshot batches don't have - // an id. - pub id: Option, - pub created_at: OffsetDateTime, - pub content: BatchContent, -} - -impl Batch { - pub fn new(id: Option, content: BatchContent) -> Self { - Self { - id, - created_at: OffsetDateTime::now_utc(), - content, - } - } - pub fn len(&self) -> usize { - match self.content { - BatchContent::DocumentsAdditionBatch(ref ts) => ts.len(), - BatchContent::IndexUpdate(_) | BatchContent::Dump(_) | BatchContent::Snapshot(_) => 1, - BatchContent::Empty => 0, - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn empty() -> Self { - Self { - id: None, - created_at: OffsetDateTime::now_utc(), - content: BatchContent::Empty, - } - } -} diff --git a/meilisearch-lib/src/tasks/error.rs b/meilisearch-lib/src/tasks/error.rs deleted file mode 100644 index 75fd7a591..000000000 --- a/meilisearch-lib/src/tasks/error.rs +++ /dev/null @@ -1,34 +0,0 @@ -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; -use tokio::task::JoinError; - -use crate::update_file_store::UpdateFileStoreError; - -use super::task::TaskId; - -pub type Result = std::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum TaskError { - #[error("Task `{0}` not found.")] - UnexistingTask(TaskId), - #[error("Internal error: {0}")] - Internal(Box), -} - -internal_error!( - TaskError: milli::heed::Error, - JoinError, - std::io::Error, - serde_json::Error, - UpdateFileStoreError -); - -impl ErrorCode for TaskError { - fn error_code(&self) -> Code { - match self { - TaskError::UnexistingTask(_) => Code::TaskNotFound, - TaskError::Internal(_) => Code::Internal, - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/dump_handler.rs b/meilisearch-lib/src/tasks/handlers/dump_handler.rs deleted file mode 100644 index c0833e4c7..000000000 --- a/meilisearch-lib/src/tasks/handlers/dump_handler.rs +++ /dev/null @@ -1,132 +0,0 @@ -use crate::dump::DumpHandler; -use crate::index_resolver::index_store::IndexStore; -use crate::index_resolver::meta_store::IndexMetaStore; -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::task::{Task, TaskContent, TaskEvent, TaskResult}; -use crate::tasks::BatchHandler; - -#[async_trait::async_trait] -impl BatchHandler for DumpHandler -where - U: IndexMetaStore + Sync + Send + 'static, - I: IndexStore + Sync + Send + 'static, -{ - fn accept(&self, batch: &Batch) -> bool { - matches!(batch.content, BatchContent::Dump { .. }) - } - - async fn process_batch(&self, mut batch: Batch) -> Batch { - match &batch.content { - BatchContent::Dump(Task { - content: TaskContent::Dump { uid }, - .. - }) => { - match self.run(uid.clone()).await { - Ok(_) => { - batch - .content - .push_event(TaskEvent::succeeded(TaskResult::Other)); - } - Err(e) => batch.content.push_event(TaskEvent::failed(e)), - } - batch - } - _ => unreachable!("invalid batch content for dump"), - } - } - - async fn finish(&self, _: &Batch) {} -} - -#[cfg(test)] -mod test { - use crate::dump::error::{DumpError, Result as DumpResult}; - use crate::index_resolver::{index_store::MockIndexStore, meta_store::MockIndexMetaStore}; - use crate::tasks::handlers::test::task_to_batch; - - use super::*; - - use nelson::Mocker; - use proptest::prelude::*; - - proptest! { - #[test] - fn finish_does_nothing( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let batch = task_to_batch(task); - - let mocker = Mocker::default(); - let dump_handler = DumpHandler::::mock(mocker); - - dump_handler.finish(&batch).await; - }); - - rt.block_on(handle).unwrap(); - } - - #[test] - fn test_handle_dump_success( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let batch = task_to_batch(task); - let should_accept = matches!(batch.content, BatchContent::Dump { .. }); - - let mocker = Mocker::default(); - if should_accept { - mocker.when::>("run") - .once() - .then(|_| Ok(())); - } - - let dump_handler = DumpHandler::::mock(mocker); - - let accept = dump_handler.accept(&batch); - assert_eq!(accept, should_accept); - - if accept { - let batch = dump_handler.process_batch(batch).await; - let last_event = batch.content.first().unwrap().events.last().unwrap(); - assert!(matches!(last_event, TaskEvent::Succeeded { .. })); - } - }); - - rt.block_on(handle).unwrap(); - } - - #[test] - fn test_handle_dump_error( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let batch = task_to_batch(task); - let should_accept = matches!(batch.content, BatchContent::Dump { .. }); - - let mocker = Mocker::default(); - if should_accept { - mocker.when::>("run") - .once() - .then(|_| Err(DumpError::Internal("error".into()))); - } - - let dump_handler = DumpHandler::::mock(mocker); - - let accept = dump_handler.accept(&batch); - assert_eq!(accept, should_accept); - - if accept { - let batch = dump_handler.process_batch(batch).await; - let last_event = batch.content.first().unwrap().events.last().unwrap(); - assert!(matches!(last_event, TaskEvent::Failed { .. })); - } - }); - - rt.block_on(handle).unwrap(); - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/empty_handler.rs b/meilisearch-lib/src/tasks/handlers/empty_handler.rs deleted file mode 100644 index d800e1965..000000000 --- a/meilisearch-lib/src/tasks/handlers/empty_handler.rs +++ /dev/null @@ -1,18 +0,0 @@ -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::BatchHandler; - -/// A sink handler for empty tasks. -pub struct EmptyBatchHandler; - -#[async_trait::async_trait] -impl BatchHandler for EmptyBatchHandler { - fn accept(&self, batch: &Batch) -> bool { - matches!(batch.content, BatchContent::Empty) - } - - async fn process_batch(&self, batch: Batch) -> Batch { - batch - } - - async fn finish(&self, _: &Batch) {} -} diff --git a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs b/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs deleted file mode 100644 index 22c57e2fd..000000000 --- a/meilisearch-lib/src/tasks/handlers/index_resolver_handler.rs +++ /dev/null @@ -1,199 +0,0 @@ -use crate::index_resolver::IndexResolver; -use crate::index_resolver::{index_store::IndexStore, meta_store::IndexMetaStore}; -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::BatchHandler; - -#[async_trait::async_trait] -impl BatchHandler for IndexResolver -where - U: IndexMetaStore + Send + Sync + 'static, - I: IndexStore + Send + Sync + 'static, -{ - fn accept(&self, batch: &Batch) -> bool { - matches!( - batch.content, - BatchContent::DocumentsAdditionBatch(_) | BatchContent::IndexUpdate(_) - ) - } - - async fn process_batch(&self, mut batch: Batch) -> Batch { - match batch.content { - BatchContent::DocumentsAdditionBatch(ref mut tasks) => { - self.process_document_addition_batch(tasks).await; - } - BatchContent::IndexUpdate(ref mut task) => { - self.process_task(task).await; - } - _ => unreachable!(), - } - - batch - } - - async fn finish(&self, batch: &Batch) { - if let BatchContent::DocumentsAdditionBatch(ref tasks) = batch.content { - for task in tasks { - if let Some(content_uuid) = task.get_content_uuid() { - if let Err(e) = self.delete_content_file(content_uuid).await { - log::error!("error deleting update file: {}", e); - } - } - } - } - } -} - -#[cfg(test)] -mod test { - use crate::index_resolver::index_store::MapIndexStore; - use crate::index_resolver::meta_store::HeedMetaStore; - use crate::index_resolver::{ - error::Result as IndexResult, index_store::MockIndexStore, meta_store::MockIndexMetaStore, - }; - use crate::tasks::{ - handlers::test::task_to_batch, - task::{Task, TaskContent}, - }; - use crate::update_file_store::{Result as FileStoreResult, UpdateFileStore}; - - use super::*; - use meilisearch_types::index_uid::IndexUid; - use milli::update::IndexDocumentsMethod; - use nelson::Mocker; - use proptest::prelude::*; - use uuid::Uuid; - - proptest! { - #[test] - fn test_accept_task( - task in any::(), - ) { - let batch = task_to_batch(task); - - let index_store = MockIndexStore::new(); - let meta_store = MockIndexMetaStore::new(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); - - match batch.content { - BatchContent::DocumentsAdditionBatch(_) - | BatchContent::IndexUpdate(_) => assert!(index_resolver.accept(&batch)), - BatchContent::Dump(_) - | BatchContent::Snapshot(_) - | BatchContent::Empty => assert!(!index_resolver.accept(&batch)), - } - } - } - - #[actix_rt::test] - async fn finisher_called_on_document_update() { - let index_store = MockIndexStore::new(); - let meta_store = MockIndexMetaStore::new(); - let mocker = Mocker::default(); - let content_uuid = Uuid::new_v4(); - mocker - .when::>("delete") - .once() - .then(move |uuid| { - assert_eq!(uuid, content_uuid); - Ok(()) - }); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); - - let task = Task { - id: 1, - content: TaskContent::DocumentAddition { - content_uuid, - merge_strategy: IndexDocumentsMethod::ReplaceDocuments, - primary_key: None, - documents_count: 100, - allow_index_creation: true, - index_uid: IndexUid::new_unchecked("test"), - }, - events: Vec::new(), - }; - - let batch = task_to_batch(task); - - index_resolver.finish(&batch).await; - } - - #[actix_rt::test] - #[should_panic] - async fn panic_when_passed_unsupported_batch() { - let index_store = MockIndexStore::new(); - let meta_store = MockIndexMetaStore::new(); - let mocker = Mocker::default(); - let update_file_store = UpdateFileStore::mock(mocker); - let index_resolver = IndexResolver::new(meta_store, index_store, update_file_store); - - let task = Task { - id: 1, - content: TaskContent::Dump { - uid: String::from("hello"), - }, - events: Vec::new(), - }; - - let batch = task_to_batch(task); - - index_resolver.process_batch(batch).await; - } - - proptest! { - #[test] - fn index_document_task_deletes_update_file( - task in any::(), - ) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let mocker = Mocker::default(); - - if let TaskContent::DocumentAddition{ .. } = task.content { - mocker.when::>("delete_content_file").then(|_| Ok(())); - } - - let index_resolver: IndexResolver = IndexResolver::mock(mocker); - - let batch = task_to_batch(task); - - index_resolver.finish(&batch).await; - }); - - rt.block_on(handle).unwrap(); - } - - #[test] - fn test_handle_batch(task in any::()) { - let rt = tokio::runtime::Runtime::new().unwrap(); - let handle = rt.spawn(async { - let mocker = Mocker::default(); - match task.content { - TaskContent::DocumentAddition { .. } => { - mocker.when::<&mut [Task], ()>("process_document_addition_batch").then(|_| ()); - } - TaskContent::Dump { .. } => (), - _ => { - mocker.when::<&mut Task, ()>("process_task").then(|_| ()); - } - } - let index_resolver: IndexResolver = IndexResolver::mock(mocker); - - - let batch = task_to_batch(task); - - if index_resolver.accept(&batch) { - index_resolver.process_batch(batch).await; - } - }); - - if let Err(e) = rt.block_on(handle) { - if e.is_panic() { - std::panic::resume_unwind(e.into_panic()); - } - } - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/mod.rs b/meilisearch-lib/src/tasks/handlers/mod.rs deleted file mode 100644 index 8f02de8b9..000000000 --- a/meilisearch-lib/src/tasks/handlers/mod.rs +++ /dev/null @@ -1,34 +0,0 @@ -pub mod dump_handler; -pub mod empty_handler; -mod index_resolver_handler; -pub mod snapshot_handler; - -#[cfg(test)] -mod test { - use time::OffsetDateTime; - - use crate::tasks::{ - batch::{Batch, BatchContent}, - task::{Task, TaskContent}, - }; - - pub fn task_to_batch(task: Task) -> Batch { - let content = match task.content { - TaskContent::DocumentAddition { .. } => { - BatchContent::DocumentsAdditionBatch(vec![task]) - } - TaskContent::DocumentDeletion { .. } - | TaskContent::SettingsUpdate { .. } - | TaskContent::IndexDeletion { .. } - | TaskContent::IndexCreation { .. } - | TaskContent::IndexUpdate { .. } => BatchContent::IndexUpdate(task), - TaskContent::Dump { .. } => BatchContent::Dump(task), - }; - - Batch { - id: Some(1), - created_at: OffsetDateTime::now_utc(), - content, - } - } -} diff --git a/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs b/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs deleted file mode 100644 index 32fe6d746..000000000 --- a/meilisearch-lib/src/tasks/handlers/snapshot_handler.rs +++ /dev/null @@ -1,26 +0,0 @@ -use crate::tasks::batch::{Batch, BatchContent}; -use crate::tasks::BatchHandler; - -pub struct SnapshotHandler; - -#[async_trait::async_trait] -impl BatchHandler for SnapshotHandler { - fn accept(&self, batch: &Batch) -> bool { - matches!(batch.content, BatchContent::Snapshot(_)) - } - - async fn process_batch(&self, batch: Batch) -> Batch { - match batch.content { - BatchContent::Snapshot(job) => { - if let Err(e) = job.run().await { - log::error!("snapshot error: {e}"); - } - } - _ => unreachable!(), - } - - Batch::empty() - } - - async fn finish(&self, _: &Batch) {} -} diff --git a/meilisearch-lib/src/tasks/mod.rs b/meilisearch-lib/src/tasks/mod.rs deleted file mode 100644 index fe722a987..000000000 --- a/meilisearch-lib/src/tasks/mod.rs +++ /dev/null @@ -1,56 +0,0 @@ -use async_trait::async_trait; - -pub use handlers::empty_handler::EmptyBatchHandler; -pub use handlers::snapshot_handler::SnapshotHandler; -pub use scheduler::Scheduler; -pub use task_store::TaskFilter; - -#[cfg(test)] -pub use task_store::test::MockTaskStore as TaskStore; -#[cfg(not(test))] -pub use task_store::TaskStore; - -use batch::Batch; -use error::Result; - -pub mod batch; -pub mod error; -mod handlers; -mod scheduler; -pub mod task; -mod task_store; -pub mod update_loop; - -#[cfg_attr(test, mockall::automock(type Error=test::DebugError;))] -#[async_trait] -pub trait BatchHandler: Sync + Send + 'static { - /// return whether this handler can accept this batch - fn accept(&self, batch: &Batch) -> bool; - - /// Processes the `Task` batch returning the batch with the `Task` updated. - /// - /// It is ok for this function to panic if a batch is handed that hasn't been verified by - /// `accept` beforehand. - async fn process_batch(&self, batch: Batch) -> Batch; - - /// `finish` is called when the result of `process` has been committed to the task store. This - /// method can be used to perform cleanup after the update has been completed for example. - async fn finish(&self, batch: &Batch); -} - -#[cfg(test)] -mod test { - use serde::{Deserialize, Serialize}; - use std::fmt::Display; - - #[derive(Debug, Serialize, Deserialize)] - pub struct DebugError; - - impl Display for DebugError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("an error") - } - } - - impl std::error::Error for DebugError {} -} diff --git a/meilisearch-lib/src/tasks/task.rs b/meilisearch-lib/src/tasks/task.rs deleted file mode 100644 index 7f9b72964..000000000 --- a/meilisearch-lib/src/tasks/task.rs +++ /dev/null @@ -1,195 +0,0 @@ -use meilisearch_types::error::ResponseError; -use meilisearch_types::index_uid::IndexUid; -use milli::update::{DocumentAdditionResult, IndexDocumentsMethod}; -use serde::{Deserialize, Serialize}; -use time::OffsetDateTime; -use uuid::Uuid; - -use super::batch::BatchId; -use crate::index::{Settings, Unchecked}; - -pub type TaskId = u32; - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum TaskResult { - DocumentAddition { indexed_documents: u64 }, - DocumentDeletion { deleted_documents: u64 }, - ClearAll { deleted_documents: u64 }, - Other, -} - -impl From for TaskResult { - fn from(other: DocumentAdditionResult) -> Self { - Self::DocumentAddition { - indexed_documents: other.indexed_documents, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum TaskEvent { - Created( - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - OffsetDateTime, - ), - Batched { - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - batch_id: BatchId, - }, - Processing( - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - OffsetDateTime, - ), - Succeeded { - result: TaskResult, - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, - Failed { - error: ResponseError, - #[cfg_attr(test, proptest(strategy = "test::datetime_strategy()"))] - #[serde(with = "time::serde::rfc3339")] - timestamp: OffsetDateTime, - }, -} - -impl TaskEvent { - pub fn succeeded(result: TaskResult) -> Self { - Self::Succeeded { - result, - timestamp: OffsetDateTime::now_utc(), - } - } - - pub fn failed(error: impl Into) -> Self { - Self::Failed { - error: error.into(), - timestamp: OffsetDateTime::now_utc(), - } - } -} - -/// A task represents an operation that Meilisearch must do. -/// It's stored on disk and executed from the lowest to highest Task id. -/// Every time a new task is created it has a higher Task id than the previous one. -/// See also `Job`. -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub struct Task { - pub id: TaskId, - /// The name of the index the task is targeting. If it isn't targeting any index (i.e Dump task) - /// then this is None - // TODO: when next forward breaking dumps, it would be a good idea to move this field inside of - // the TaskContent. - pub content: TaskContent, - pub events: Vec, -} - -impl Task { - /// Return true when a task is finished. - /// A task is finished when its last state is either `Succeeded` or `Failed`. - pub fn is_finished(&self) -> bool { - self.events.last().map_or(false, |event| { - matches!( - event, - TaskEvent::Succeeded { .. } | TaskEvent::Failed { .. } - ) - }) - } - - /// Return the content_uuid of the `Task` if there is one. - pub fn get_content_uuid(&self) -> Option { - match self { - Task { - content: TaskContent::DocumentAddition { content_uuid, .. }, - .. - } => Some(*content_uuid), - _ => None, - } - } - - pub fn index_uid(&self) -> Option<&str> { - match &self.content { - TaskContent::DocumentAddition { index_uid, .. } - | TaskContent::DocumentDeletion { index_uid, .. } - | TaskContent::SettingsUpdate { index_uid, .. } - | TaskContent::IndexDeletion { index_uid } - | TaskContent::IndexCreation { index_uid, .. } - | TaskContent::IndexUpdate { index_uid, .. } => Some(index_uid.as_str()), - TaskContent::Dump { .. } => None, - } - } -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -pub enum DocumentDeletion { - Clear, - Ids(Vec), -} - -#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] -#[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[allow(clippy::large_enum_variant)] -pub enum TaskContent { - DocumentAddition { - index_uid: IndexUid, - #[cfg_attr(test, proptest(value = "Uuid::new_v4()"))] - content_uuid: Uuid, - #[cfg_attr(test, proptest(strategy = "test::index_document_method_strategy()"))] - merge_strategy: IndexDocumentsMethod, - primary_key: Option, - documents_count: usize, - allow_index_creation: bool, - }, - DocumentDeletion { - index_uid: IndexUid, - deletion: DocumentDeletion, - }, - SettingsUpdate { - index_uid: IndexUid, - settings: Settings, - /// Indicates whether the task was a deletion - is_deletion: bool, - allow_index_creation: bool, - }, - IndexDeletion { - index_uid: IndexUid, - }, - IndexCreation { - index_uid: IndexUid, - primary_key: Option, - }, - IndexUpdate { - index_uid: IndexUid, - primary_key: Option, - }, - Dump { - uid: String, - }, -} - -#[cfg(test)] -mod test { - use proptest::prelude::*; - - use super::*; - - pub(super) fn index_document_method_strategy() -> impl Strategy { - prop_oneof![ - Just(IndexDocumentsMethod::ReplaceDocuments), - Just(IndexDocumentsMethod::UpdateDocuments), - ] - } - - pub(super) fn datetime_strategy() -> impl Strategy { - Just(OffsetDateTime::now_utc()) - } -} diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs deleted file mode 100644 index 24d0d3a65..000000000 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ /dev/null @@ -1,377 +0,0 @@ -#[allow(clippy::upper_case_acronyms)] - -type BEU32 = milli::heed::zerocopy::U32; - -const INDEX_UIDS_TASK_IDS: &str = "index-uids-task-ids"; -const TASKS: &str = "tasks"; - -use std::collections::HashSet; -use std::ops::Bound::{Excluded, Unbounded}; -use std::result::Result as StdResult; -use std::sync::Arc; - -use milli::heed::types::{OwnedType, SerdeJson, Str}; -use milli::heed::{Database, Env, RoTxn, RwTxn}; -use milli::heed_codec::RoaringBitmapCodec; -use roaring::RoaringBitmap; - -use crate::tasks::task::{Task, TaskId}; - -use super::super::Result; -use super::TaskFilter; - -pub struct Store { - env: Arc, - /// Maps an index uid to the set of tasks ids associated to it. - index_uid_task_ids: Database, - tasks: Database, SerdeJson>, -} - -impl Drop for Store { - fn drop(&mut self) { - if Arc::strong_count(&self.env) == 1 { - self.env.as_ref().clone().prepare_for_closing(); - } - } -} - -impl Store { - /// Create a new store from the specified `Path`. - /// Be really cautious when calling this function, the returned `Store` may - /// be in an invalid state, with dangling processing tasks. - /// You want to patch all un-finished tasks and put them in your pending - /// queue with the `reset_and_return_unfinished_update` method. - pub fn new(env: Arc) -> Result { - let index_uid_task_ids = env.create_database(Some(INDEX_UIDS_TASK_IDS))?; - let tasks = env.create_database(Some(TASKS))?; - - Ok(Self { - env, - index_uid_task_ids, - tasks, - }) - } - - pub fn wtxn(&self) -> Result { - Ok(self.env.write_txn()?) - } - - pub fn rtxn(&self) -> Result { - Ok(self.env.read_txn()?) - } - - /// Returns the id for the next task. - /// - /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit - /// the task to the store in the same transaction, no one else will hav this task id. - pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { - let id = self - .tasks - .lazily_decode_data() - .last(txn)? - .map(|(id, _)| id.get() + 1) - .unwrap_or(0); - Ok(id) - } - - pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { - self.tasks.put(txn, &BEU32::new(task.id), task)?; - // only add the task to the indexes index if it has an index_uid - if let Some(index_uid) = task.index_uid() { - let mut tasks_set = self - .index_uid_task_ids - .get(txn, index_uid)? - .unwrap_or_default(); - - tasks_set.insert(task.id); - - self.index_uid_task_ids.put(txn, index_uid, &tasks_set)?; - } - - Ok(()) - } - - pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result> { - let task = self.tasks.get(txn, &BEU32::new(id))?; - Ok(task) - } - - /// Returns the unfinished tasks starting from the given taskId in ascending order. - pub fn fetch_unfinished_tasks(&self, txn: &RoTxn, from: Option) -> Result> { - // We must NEVER re-enqueue an already processed task! It's content uuid would point to an unexisting file. - // - // TODO(marin): This may create some latency when the first batch lazy loads the pending updates. - let from = from.unwrap_or_default(); - - let result: StdResult, milli::heed::Error> = self - .tasks - .range(txn, &(BEU32::new(from)..))? - .map(|r| r.map(|(_, t)| t)) - .filter(|result| result.as_ref().map_or(true, |t| !t.is_finished())) - .collect(); - - result.map_err(Into::into) - } - - /// Returns all the tasks starting from the given taskId and going in descending order. - pub fn list_tasks( - &self, - txn: &RoTxn, - from: Option, - filter: Option, - limit: Option, - ) -> Result> { - let from = match from { - Some(from) => from, - None => self.tasks.last(txn)?.map_or(0, |(id, _)| id.get()), - }; - - let filter_fn = |task: &Task| { - filter - .as_ref() - .and_then(|f| f.filter_fn.as_ref()) - .map_or(true, |f| f(task)) - }; - - let result: Result> = match filter.as_ref().and_then(|f| f.filtered_indexes()) { - Some(indexes) => self - .compute_candidates(txn, indexes, from)? - .filter(|result| result.as_ref().map_or(true, filter_fn)) - .take(limit.unwrap_or(usize::MAX)) - .collect(), - None => self - .tasks - .rev_range(txn, &(..=BEU32::new(from)))? - .map(|r| r.map(|(_, t)| t).map_err(Into::into)) - .filter(|result| result.as_ref().map_or(true, filter_fn)) - .take(limit.unwrap_or(usize::MAX)) - .collect(), - }; - - result.map_err(Into::into) - } - - fn compute_candidates<'a>( - &'a self, - txn: &'a RoTxn, - indexes: &HashSet, - from: TaskId, - ) -> Result> + 'a> { - let mut candidates = RoaringBitmap::new(); - - for index_uid in indexes { - if let Some(tasks_set) = self.index_uid_task_ids.get(txn, index_uid)? { - candidates |= tasks_set; - } - } - - candidates.remove_range((Excluded(from), Unbounded)); - - let iter = candidates - .into_iter() - .rev() - .filter_map(|id| self.get(txn, id).transpose()); - - Ok(iter) - } -} - -#[cfg(test)] -pub mod test { - use itertools::Itertools; - use meilisearch_types::index_uid::IndexUid; - use milli::heed::EnvOpenOptions; - use nelson::Mocker; - use tempfile::TempDir; - - use crate::tasks::task::TaskContent; - - use super::*; - - /// TODO: use this mock to test the task store properly. - #[allow(dead_code)] - pub enum MockStore { - Real(Store), - Fake(Mocker), - } - - pub struct TmpEnv(TempDir, Arc); - - impl TmpEnv { - pub fn env(&self) -> Arc { - self.1.clone() - } - } - - pub fn tmp_env() -> TmpEnv { - let tmp = tempfile::tempdir().unwrap(); - - let mut options = EnvOpenOptions::new(); - options.map_size(4096 * 100000); - options.max_dbs(1000); - let env = Arc::new(options.open(tmp.path()).unwrap()); - - TmpEnv(tmp, env) - } - - impl MockStore { - pub fn new(env: Arc) -> Result { - Ok(Self::Real(Store::new(env)?)) - } - - pub fn wtxn(&self) -> Result { - match self { - MockStore::Real(index) => index.wtxn(), - MockStore::Fake(_) => todo!(), - } - } - - pub fn rtxn(&self) -> Result { - match self { - MockStore::Real(index) => index.rtxn(), - MockStore::Fake(_) => todo!(), - } - } - - pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { - match self { - MockStore::Real(index) => index.next_task_id(txn), - MockStore::Fake(_) => todo!(), - } - } - - pub fn put(&self, txn: &mut RwTxn, task: &Task) -> Result<()> { - match self { - MockStore::Real(index) => index.put(txn, task), - MockStore::Fake(_) => todo!(), - } - } - - pub fn get(&self, txn: &RoTxn, id: TaskId) -> Result> { - match self { - MockStore::Real(index) => index.get(txn, id), - MockStore::Fake(_) => todo!(), - } - } - - pub fn fetch_unfinished_tasks( - &self, - txn: &RoTxn, - from: Option, - ) -> Result> { - match self { - MockStore::Real(index) => index.fetch_unfinished_tasks(txn, from), - MockStore::Fake(_) => todo!(), - } - } - - pub fn list_tasks( - &self, - txn: &RoTxn, - from: Option, - filter: Option, - limit: Option, - ) -> Result> { - match self { - MockStore::Real(index) => index.list_tasks(txn, from, filter, limit), - MockStore::Fake(_) => todo!(), - } - } - } - - #[test] - fn test_ordered_filtered_updates() { - let tmp = tmp_env(); - let store = Store::new(tmp.env()).unwrap(); - - let tasks = (0..100) - .map(|_| Task { - id: rand::random(), - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: vec![], - }) - .collect::>(); - - let mut txn = store.env.write_txn().unwrap(); - tasks - .iter() - .try_for_each(|t| store.put(&mut txn, t)) - .unwrap(); - - let mut filter = TaskFilter::default(); - filter.filter_index("test".into()); - - let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); - - assert!(tasks - .iter() - .map(|t| t.id) - .tuple_windows() - .all(|(a, b)| a > b)); - } - - #[test] - fn test_filter_same_index_prefix() { - let tmp = tmp_env(); - let store = Store::new(tmp.env()).unwrap(); - - let task_1 = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: vec![], - }; - - let task_2 = Task { - id: 0, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test1"), - }, - events: vec![], - }; - - let mut txn = store.wtxn().unwrap(); - store.put(&mut txn, &task_1).unwrap(); - store.put(&mut txn, &task_2).unwrap(); - - let mut filter = TaskFilter::default(); - filter.filter_index("test".into()); - - let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); - - txn.abort().unwrap(); - assert_eq!(tasks.len(), 1); - assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test"); - - // same thing but invert the ids - let task_1 = Task { - id: 0, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test"), - }, - events: vec![], - }; - let task_2 = Task { - id: 1, - content: TaskContent::IndexDeletion { - index_uid: IndexUid::new_unchecked("test1"), - }, - events: vec![], - }; - - let mut txn = store.wtxn().unwrap(); - store.put(&mut txn, &task_1).unwrap(); - store.put(&mut txn, &task_2).unwrap(); - - let mut filter = TaskFilter::default(); - filter.filter_index("test".into()); - - let tasks = store.list_tasks(&txn, None, Some(filter), None).unwrap(); - - assert_eq!(tasks.len(), 1); - assert_eq!(tasks.first().as_ref().unwrap().index_uid().unwrap(), "test"); - } -} diff --git a/meilisearch-lib/src/tasks/update_loop.rs b/meilisearch-lib/src/tasks/update_loop.rs deleted file mode 100644 index b6e43e319..000000000 --- a/meilisearch-lib/src/tasks/update_loop.rs +++ /dev/null @@ -1,93 +0,0 @@ -use std::sync::Arc; - -use time::OffsetDateTime; -use tokio::sync::{watch, RwLock}; - -use super::batch::Batch; -use super::error::Result; -use super::{BatchHandler, Scheduler}; -use crate::tasks::task::TaskEvent; - -/// The update loop sequentially performs batches of updates by asking the scheduler for a batch, -/// and handing it to the `TaskPerformer`. -pub struct UpdateLoop { - scheduler: Arc>, - performers: Vec>, - - notifier: Option>, -} - -impl UpdateLoop { - pub fn new( - scheduler: Arc>, - performers: Vec>, - notifier: watch::Receiver<()>, - ) -> Self { - Self { - scheduler, - performers, - notifier: Some(notifier), - } - } - - pub async fn run(mut self) { - let mut notifier = self.notifier.take().unwrap(); - - loop { - if notifier.changed().await.is_err() { - break; - } - - if let Err(e) = self.process_next_batch().await { - log::error!("an error occurred while processing an update batch: {}", e); - } - } - } - - async fn process_next_batch(&self) -> Result<()> { - let mut batch = { self.scheduler.write().await.prepare().await? }; - let performer = self - .performers - .iter() - .find(|p| p.accept(&batch)) - .expect("No performer found for batch") - .clone(); - - batch - .content - .push_event(TaskEvent::Processing(OffsetDateTime::now_utc())); - - batch.content = { - self.scheduler - .read() - .await - .update_tasks(batch.content) - .await? - }; - - let batch = performer.process_batch(batch).await; - - self.handle_batch_result(batch, performer).await?; - - Ok(()) - } - - /// Handles the result from a processed batch. - /// - /// When a task is processed, the result of the process is pushed to its event list. The - /// `handle_batch_result` make sure that the new state is saved to the store. - /// The tasks are then removed from the processing queue. - async fn handle_batch_result( - &self, - mut batch: Batch, - performer: Arc, - ) -> Result<()> { - let mut scheduler = self.scheduler.write().await; - let content = scheduler.update_tasks(batch.content).await?; - scheduler.finish(); - drop(scheduler); - batch.content = content; - performer.finish(&batch).await; - Ok(()) - } -} diff --git a/meilisearch-lib/src/update_file_store.rs b/meilisearch-lib/src/update_file_store.rs deleted file mode 100644 index cb4eadf4d..000000000 --- a/meilisearch-lib/src/update_file_store.rs +++ /dev/null @@ -1,258 +0,0 @@ -use std::fs::{create_dir_all, File}; -use std::io::{self, BufReader, BufWriter, Write}; -use std::ops::{Deref, DerefMut}; -use std::path::{Path, PathBuf}; - -use milli::documents::DocumentsBatchReader; -use serde_json::Map; -use tempfile::{NamedTempFile, PersistError}; -use uuid::Uuid; - -#[cfg(not(test))] -pub use store::UpdateFileStore; -#[cfg(test)] -pub use test::MockUpdateFileStore as UpdateFileStore; - -const UPDATE_FILES_PATH: &str = "updates/updates_files"; - -use crate::document_formats::read_ndjson; - -pub struct UpdateFile { - path: PathBuf, - file: NamedTempFile, -} - -#[derive(Debug, thiserror::Error)] -#[error("Error while persisting update to disk: {0}")] -pub struct UpdateFileStoreError(Box); - -pub type Result = std::result::Result; - -macro_rules! into_update_store_error { - ($($other:path),*) => { - $( - impl From<$other> for UpdateFileStoreError { - fn from(other: $other) -> Self { - Self(Box::new(other)) - } - } - )* - }; -} - -into_update_store_error!( - PersistError, - io::Error, - serde_json::Error, - milli::documents::Error, - milli::documents::DocumentsBatchCursorError -); - -impl UpdateFile { - pub fn persist(self) -> Result<()> { - self.file.persist(&self.path)?; - Ok(()) - } -} - -impl Deref for UpdateFile { - type Target = NamedTempFile; - - fn deref(&self) -> &Self::Target { - &self.file - } -} - -impl DerefMut for UpdateFile { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.file - } -} - -mod store { - use super::*; - - #[derive(Clone, Debug)] - pub struct UpdateFileStore { - path: PathBuf, - } - - impl UpdateFileStore { - pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH); - let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH); - - // No update files to load - if !src_update_files_path.exists() { - return Ok(()); - } - - create_dir_all(&dst_update_files_path)?; - - let entries = std::fs::read_dir(src_update_files_path)?; - - for entry in entries { - let entry = entry?; - let update_file = BufReader::new(File::open(entry.path())?); - let file_uuid = entry.file_name(); - let file_uuid = file_uuid - .to_str() - .ok_or_else(|| anyhow::anyhow!("invalid update file name"))?; - let dst_path = dst_update_files_path.join(file_uuid); - let dst_file = BufWriter::new(File::create(dst_path)?); - read_ndjson(update_file, dst_file)?; - } - - Ok(()) - } - - pub fn new(path: impl AsRef) -> Result { - let path = path.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&path)?; - Ok(Self { path }) - } - - /// Creates a new temporary update file. - /// A call to `persist` is needed to persist the file in the database. - pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { - let file = NamedTempFile::new_in(&self.path)?; - let uuid = Uuid::new_v4(); - let path = self.path.join(uuid.to_string()); - let update_file = UpdateFile { file, path }; - - Ok((uuid, update_file)) - } - - /// Returns the file corresponding to the requested uuid. - pub fn get_update(&self, uuid: Uuid) -> Result { - let path = self.path.join(uuid.to_string()); - let file = File::open(path)?; - Ok(file) - } - - /// Copies the content of the update file pointed to by `uuid` to the `dst` directory. - pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { - let src = self.path.join(uuid.to_string()); - let mut dst = dst.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&dst)?; - dst.push(uuid.to_string()); - std::fs::copy(src, dst)?; - Ok(()) - } - - /// Peforms a dump of the given update file uuid into the provided dump path. - pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef) -> Result<()> { - let uuid_string = uuid.to_string(); - let update_file_path = self.path.join(&uuid_string); - let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH); - std::fs::create_dir_all(&dst)?; - dst.push(&uuid_string); - - let update_file = File::open(update_file_path)?; - let mut dst_file = NamedTempFile::new_in(&dump_path)?; - let (mut document_cursor, index) = - DocumentsBatchReader::from_reader(update_file)?.into_cursor_and_fields_index(); - - let mut document_buffer = Map::new(); - // TODO: we need to find a way to do this more efficiently. (create a custom serializer - // for jsonl for example...) - while let Some(document) = document_cursor.next_document()? { - for (field_id, content) in document.iter() { - if let Some(field_name) = index.name(field_id) { - let content = serde_json::from_slice(content)?; - document_buffer.insert(field_name.to_string(), content); - } - } - - serde_json::to_writer(&mut dst_file, &document_buffer)?; - dst_file.write_all(b"\n")?; - document_buffer.clear(); - } - - dst_file.persist(dst)?; - - Ok(()) - } - - pub fn get_size(&self, uuid: Uuid) -> Result { - Ok(self.get_update(uuid)?.metadata()?.len()) - } - - pub async fn delete(&self, uuid: Uuid) -> Result<()> { - let path = self.path.join(uuid.to_string()); - tokio::fs::remove_file(path).await?; - Ok(()) - } - } -} - -#[cfg(test)] -mod test { - use std::sync::Arc; - - use nelson::Mocker; - - use super::*; - - #[derive(Clone)] - pub enum MockUpdateFileStore { - Real(store::UpdateFileStore), - Mock(Arc), - } - - impl MockUpdateFileStore { - pub fn mock(mocker: Mocker) -> Self { - Self::Mock(Arc::new(mocker)) - } - - pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - store::UpdateFileStore::load_dump(src, dst) - } - - pub fn new(path: impl AsRef) -> Result { - store::UpdateFileStore::new(path).map(Self::Real) - } - - pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { - match self { - MockUpdateFileStore::Real(s) => s.new_update(), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn get_update(&self, uuid: Uuid) -> Result { - match self { - MockUpdateFileStore::Real(s) => s.get_update(uuid), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn snapshot(&self, uuid: Uuid, dst: impl AsRef) -> Result<()> { - match self { - MockUpdateFileStore::Real(s) => s.snapshot(uuid, dst), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef) -> Result<()> { - match self { - MockUpdateFileStore::Real(s) => s.dump(uuid, dump_path), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub fn get_size(&self, uuid: Uuid) -> Result { - match self { - MockUpdateFileStore::Real(s) => s.get_size(uuid), - MockUpdateFileStore::Mock(_) => todo!(), - } - } - - pub async fn delete(&self, uuid: Uuid) -> Result<()> { - match self { - MockUpdateFileStore::Real(s) => s.delete(uuid).await, - MockUpdateFileStore::Mock(mocker) => unsafe { mocker.get("delete").call(uuid) }, - } - } - } -} diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 5e03e050f..81aeaaa69 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -1,15 +1,45 @@ [package] name = "meilisearch-types" -version = "0.29.2" +version = "0.30.0" authors = ["marin "] edition = "2021" [dependencies] -actix-web = { version = "4.0.1", default-features = false } +actix-web = { version = "4.2.1", default-features = false } +anyhow = "1.0.65" +csv = "1.1.6" +either = { version = "1.6.1", features = ["serde"] } +enum-iterator = "1.1.3" +flate2 = "1.0.24" +fst = "0.4.7" +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.37.0", default-features = false } proptest = { version = "1.0.0", optional = true } proptest-derive = { version = "0.3.0", optional = true } -serde = { version = "1.0.136", features = ["derive"] } -serde_json = "1.0.79" +roaring = { version = "0.10.0", features = ["serde"] } +serde = { version = "1.0.145", features = ["derive"] } +serde_json = "1.0.85" +tar = "0.4.38" +thiserror = "1.0.30" +time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } +tokio = "1.0" +uuid = { version = "1.1.2", features = ["serde", "v4"] } + +[dev-dependencies] +insta = "1.19.1" +meili-snap = { path = "../meili-snap" } +proptest = "1.0.0" +proptest-derive = "0.3.0" [features] +# all specialized tokenizations +default = ["milli/default"] + +# chinese specialized tokenization +chinese = ["milli/chinese"] +# hebrew specialized tokenization +hebrew = ["milli/hebrew"] +# japanese specialized tokenization +japanese = ["milli/japanese"] +# thai specialized tokenization +thai = ["milli/thai"] test-traits = ["proptest", "proptest-derive"] diff --git a/meilisearch-lib/src/compression.rs b/meilisearch-types/src/compression.rs similarity index 89% rename from meilisearch-lib/src/compression.rs rename to meilisearch-types/src/compression.rs index c4747cb21..1d364b815 100644 --- a/meilisearch-lib/src/compression.rs +++ b/meilisearch-types/src/compression.rs @@ -2,7 +2,9 @@ use std::fs::{create_dir_all, File}; use std::io::Write; use std::path::Path; -use flate2::{read::GzDecoder, write::GzEncoder, Compression}; +use flate2::read::GzDecoder; +use flate2::write::GzEncoder; +use flate2::Compression; use tar::{Archive, Builder}; pub fn to_tar_gz(src: impl AsRef, dest: impl AsRef) -> anyhow::Result<()> { diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-types/src/document_formats.rs similarity index 73% rename from meilisearch-lib/src/document_formats.rs rename to meilisearch-types/src/document_formats.rs index ebc98f3fb..42a37eb43 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-types/src/document_formats.rs @@ -3,11 +3,13 @@ use std::fmt::{self, Debug, Display}; use std::io::{self, BufReader, Read, Seek, Write}; use either::Either; -use meilisearch_types::error::{Code, ErrorCode}; -use meilisearch_types::internal_error; use milli::documents::{DocumentsBatchBuilder, Error}; use milli::Object; use serde::Deserialize; +use serde_json::error::Category; + +use crate::error::{Code, ErrorCode}; +use crate::internal_error; type Result = std::result::Result; @@ -40,19 +42,33 @@ impl Display for DocumentFormatError { Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), Self::MalformedPayload(me, b) => match me.borrow() { Error::Json(se) => { + let mut message = match se.classify() { + Category::Data => { + "data are neither an object nor a list of objects".to_string() + } + _ => se.to_string(), + }; + // https://github.com/meilisearch/meilisearch/issues/2107 // The user input maybe insanely long. We need to truncate it. - let mut serde_msg = se.to_string(); let ellipsis = "..."; - if serde_msg.len() > 100 + ellipsis.len() { - serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis); + let trim_input_prefix_len = 50; + let trim_input_suffix_len = 85; + + if message.len() + > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() + { + message.replace_range( + trim_input_prefix_len..message.len() - trim_input_suffix_len, + ellipsis, + ); } write!( f, "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", - b, serde_msg - ) + b, message + ) } _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), }, @@ -90,10 +106,7 @@ pub fn read_csv(input: impl Read, writer: impl Write + Seek) -> Result { builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?; let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; Ok(count as usize) } @@ -104,9 +117,7 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result let reader = BufReader::new(input); for result in serde_json::Deserializer::from_reader(reader).into_iter() { - let object = result - .map_err(Error::Json) - .map_err(|e| (PayloadType::Ndjson, e))?; + let object = result.map_err(Error::Json).map_err(|e| (PayloadType::Ndjson, e))?; builder .append_json_object(&object) .map_err(Into::into) @@ -114,10 +125,7 @@ pub fn read_ndjson(input: impl Read, writer: impl Write + Seek) -> Result } let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; Ok(count as usize) } @@ -134,9 +142,8 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { inner: Either, Object>, } - let content: ArrayOrSingleObject = serde_json::from_reader(reader) - .map_err(Error::Json) - .map_err(|e| (PayloadType::Json, e))?; + let content: ArrayOrSingleObject = + serde_json::from_reader(reader).map_err(Error::Json).map_err(|e| (PayloadType::Json, e))?; for object in content.inner.map_right(|o| vec![o]).into_inner() { builder @@ -146,10 +153,7 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { } let count = builder.documents_count(); - let _ = builder - .into_inner() - .map_err(Into::into) - .map_err(DocumentFormatError::Internal)?; + let _ = builder.into_inner().map_err(Into::into).map_err(DocumentFormatError::Internal)?; Ok(count as usize) } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 56ac65f9e..5c0e1d9b8 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -1,6 +1,9 @@ use std::fmt; -use actix_web::{self as aweb, http::StatusCode, HttpResponseBuilder}; +use actix_web::http::StatusCode; +use actix_web::{self as aweb, HttpResponseBuilder}; +use aweb::rt::task::JoinError; +use milli::heed::{Error as HeedError, MdbError}; use serde::{Deserialize, Serialize}; #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] @@ -8,10 +11,7 @@ use serde::{Deserialize, Serialize}; #[cfg_attr(feature = "test-traits", derive(proptest_derive::Arbitrary))] pub struct ResponseError { #[serde(skip)] - #[cfg_attr( - feature = "test-traits", - proptest(strategy = "strategy::status_code_strategy()") - )] + #[cfg_attr(feature = "test-traits", proptest(strategy = "strategy::status_code_strategy()"))] code: StatusCode, message: String, #[serde(rename = "code")] @@ -60,9 +60,7 @@ where impl aweb::error::ResponseError for ResponseError { fn error_response(&self) -> aweb::HttpResponse { let json = serde_json::to_vec(self).unwrap(); - HttpResponseBuilder::new(self.status_code()) - .content_type("application/json") - .body(json) + HttpResponseBuilder::new(self.status_code()).content_type("application/json").body(json) } fn status_code(&self) -> StatusCode { @@ -122,6 +120,8 @@ pub enum Code { InvalidIndexUid, InvalidMinWordLengthForTypo, + DuplicateIndexFound, + // invalid state error InvalidState, MissingPrimaryKey, @@ -144,9 +144,17 @@ pub enum Code { InvalidStore, InvalidToken, MissingAuthorizationHeader, + MissingMasterKey, NoSpaceLeftOnDevice, DumpNotFound, + InvalidTaskDateFilter, + InvalidTaskStatusesFilter, + InvalidTaskTypesFilter, + InvalidTaskCanceledByFilter, + InvalidTaskUidsFilter, TaskNotFound, + TaskDeletionWithEmptyQuery, + TaskCancelationWithEmptyQuery, PayloadTooLarge, RetrieveDocument, SearchDocuments, @@ -154,6 +162,8 @@ pub enum Code { DumpAlreadyInProgress, DumpProcessFailed, + // Only used when importing a dump + UnretrievableErrorCode, InvalidContentType, MissingContentType, @@ -220,10 +230,9 @@ impl Code { BadParameter => ErrCode::invalid("bad_parameter", StatusCode::BAD_REQUEST), BadRequest => ErrCode::invalid("bad_request", StatusCode::BAD_REQUEST), - DatabaseSizeLimitReached => ErrCode::internal( - "database_size_limit_reached", - StatusCode::INTERNAL_SERVER_ERROR, - ), + DatabaseSizeLimitReached => { + ErrCode::internal("database_size_limit_reached", StatusCode::INTERNAL_SERVER_ERROR) + } DocumentNotFound => ErrCode::invalid("document_not_found", StatusCode::NOT_FOUND), Internal => ErrCode::internal("internal", StatusCode::INTERNAL_SERVER_ERROR), InvalidGeoField => ErrCode::invalid("invalid_geo_field", StatusCode::BAD_REQUEST), @@ -231,7 +240,31 @@ impl Code { MissingAuthorizationHeader => { ErrCode::authentication("missing_authorization_header", StatusCode::UNAUTHORIZED) } + MissingMasterKey => { + ErrCode::authentication("missing_master_key", StatusCode::UNAUTHORIZED) + } + InvalidTaskDateFilter => { + ErrCode::invalid("invalid_task_date_filter", StatusCode::BAD_REQUEST) + } + InvalidTaskUidsFilter => { + ErrCode::invalid("invalid_task_uids_filter", StatusCode::BAD_REQUEST) + } + InvalidTaskStatusesFilter => { + ErrCode::invalid("invalid_task_statuses_filter", StatusCode::BAD_REQUEST) + } + InvalidTaskTypesFilter => { + ErrCode::invalid("invalid_task_types_filter", StatusCode::BAD_REQUEST) + } + InvalidTaskCanceledByFilter => { + ErrCode::invalid("invalid_task_canceled_by_filter", StatusCode::BAD_REQUEST) + } TaskNotFound => ErrCode::invalid("task_not_found", StatusCode::NOT_FOUND), + TaskDeletionWithEmptyQuery => { + ErrCode::invalid("missing_task_filters", StatusCode::BAD_REQUEST) + } + TaskCancelationWithEmptyQuery => { + ErrCode::invalid("missing_task_filters", StatusCode::BAD_REQUEST) + } DumpNotFound => ErrCode::invalid("dump_not_found", StatusCode::NOT_FOUND), NoSpaceLeftOnDevice => { ErrCode::internal("no_space_left_on_device", StatusCode::INTERNAL_SERVER_ERROR) @@ -260,6 +293,10 @@ impl Code { ErrCode::invalid("invalid_content_type", StatusCode::UNSUPPORTED_MEDIA_TYPE) } MissingPayload => ErrCode::invalid("missing_payload", StatusCode::BAD_REQUEST), + // This one can only happen when importing a dump and encountering an unknown code in the task queue. + UnretrievableErrorCode => { + ErrCode::invalid("unretrievable_error_code", StatusCode::BAD_REQUEST) + } // error related to keys ApiKeyNotFound => ErrCode::invalid("api_key_not_found", StatusCode::NOT_FOUND), @@ -283,6 +320,9 @@ impl Code { InvalidMinWordLengthForTypo => { ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) } + DuplicateIndexFound => { + ErrCode::invalid("duplicate_index_found", StatusCode::BAD_REQUEST) + } } } @@ -316,26 +356,77 @@ struct ErrCode { impl ErrCode { fn authentication(error_name: &'static str, status_code: StatusCode) -> ErrCode { - ErrCode { - status_code, - error_name, - error_type: ErrorType::AuthenticationError, - } + ErrCode { status_code, error_name, error_type: ErrorType::AuthenticationError } } fn internal(error_name: &'static str, status_code: StatusCode) -> ErrCode { - ErrCode { - status_code, - error_name, - error_type: ErrorType::InternalError, - } + ErrCode { status_code, error_name, error_type: ErrorType::InternalError } } fn invalid(error_name: &'static str, status_code: StatusCode) -> ErrCode { - ErrCode { - status_code, - error_name, - error_type: ErrorType::InvalidRequestError, + ErrCode { status_code, error_name, error_type: ErrorType::InvalidRequestError } + } +} + +impl ErrorCode for JoinError { + fn error_code(&self) -> Code { + Code::Internal + } +} + +impl ErrorCode for milli::Error { + fn error_code(&self) -> Code { + use milli::{Error, UserError}; + + match self { + Error::InternalError(_) => Code::Internal, + Error::IoError(_) => Code::Internal, + Error::UserError(ref error) => { + match error { + // TODO: wait for spec for new error codes. + UserError::SerdeJson(_) + | UserError::InvalidLmdbOpenOptions + | UserError::DocumentLimitReached + | UserError::AccessingSoftDeletedDocument { .. } + | UserError::UnknownInternalDocumentId { .. } => Code::Internal, + UserError::InvalidStoreFile => Code::InvalidStore, + UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, + UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached, + UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, + UserError::InvalidFilter(_) => Code::Filter, + UserError::MissingDocumentId { .. } => Code::MissingDocumentId, + UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { + Code::InvalidDocumentId + } + UserError::MissingPrimaryKey => Code::MissingPrimaryKey, + UserError::PrimaryKeyCannotBeChanged(_) => Code::PrimaryKeyAlreadyPresent, + UserError::SortRankingRuleMissing => Code::Sort, + UserError::InvalidFacetsDistribution { .. } => Code::BadRequest, + UserError::InvalidSortableAttribute { .. } => Code::Sort, + UserError::CriterionError(_) => Code::InvalidRankingRule, + UserError::InvalidGeoField { .. } => Code::InvalidGeoField, + UserError::SortError(_) => Code::Sort, + UserError::InvalidMinTypoWordLenSetting(_, _) => { + Code::InvalidMinWordLengthForTypo + } + } + } + } + } +} + +impl ErrorCode for HeedError { + fn error_code(&self) -> Code { + match self { + HeedError::Mdb(MdbError::MapFull) => Code::DatabaseSizeLimitReached, + HeedError::Mdb(MdbError::Invalid) => Code::InvalidStore, + HeedError::Io(_) + | HeedError::Mdb(_) + | HeedError::Encoding + | HeedError::Decoding + | HeedError::InvalidDatabaseTyping + | HeedError::DatabaseClosing + | HeedError::BadOpenOptions => Code::Internal, } } } diff --git a/meilisearch-types/src/index_uid.rs b/meilisearch-types/src/index_uid.rs index a8cb726af..945a57e9e 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/meilisearch-types/src/index_uid.rs @@ -1,8 +1,11 @@ -use serde::{Deserialize, Serialize}; use std::error::Error; use std::fmt; use std::str::FromStr; +use serde::{Deserialize, Serialize}; + +use crate::error::{Code, ErrorCode}; + /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] @@ -38,9 +41,7 @@ impl TryFrom for IndexUid { type Error = IndexUidFormatError; fn try_from(uid: String) -> Result { - if !uid - .chars() - .all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') + if !uid.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') || uid.is_empty() || uid.len() > 400 { @@ -74,12 +75,18 @@ impl fmt::Display for IndexUidFormatError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "invalid index uid `{}`, the uid must be an integer \ - or a string containing only alphanumeric characters \ - a-z A-Z 0-9, hyphens - and underscores _.", + "`{}` is not a valid index uid. Index uid can be an \ + integer or a string containing only alphanumeric \ + characters, hyphens (-) and underscores (_).", self.invalid_uid, ) } } impl Error for IndexUidFormatError {} + +impl ErrorCode for IndexUidFormatError { + fn error_code(&self) -> Code { + Code::InvalidIndexUid + } +} diff --git a/meilisearch-types/src/keys.rs b/meilisearch-types/src/keys.rs new file mode 100644 index 000000000..2ec624809 --- /dev/null +++ b/meilisearch-types/src/keys.rs @@ -0,0 +1,390 @@ +use std::hash::Hash; +use std::str::FromStr; + +use enum_iterator::Sequence; +use serde::{Deserialize, Serialize}; +use serde_json::{from_value, Value}; +use time::format_description::well_known::Rfc3339; +use time::macros::{format_description, time}; +use time::{Date, OffsetDateTime, PrimitiveDateTime}; +use uuid::Uuid; + +use crate::error::{Code, ErrorCode}; +use crate::index_uid::{IndexUid, IndexUidFormatError}; +use crate::star_or::StarOr; + +type Result = std::result::Result; + +pub type KeyId = Uuid; + +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct Key { + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + pub uid: KeyId, + pub actions: Vec, + pub indexes: Vec>, + #[serde(with = "time::serde::rfc3339::option")] + pub expires_at: Option, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339")] + pub updated_at: OffsetDateTime, +} + +impl Key { + pub fn create_from_value(value: Value) -> Result { + let name = match value.get("name") { + None | Some(Value::Null) => None, + Some(des) => from_value(des.clone()) + .map(Some) + .map_err(|_| Error::InvalidApiKeyName(des.clone()))?, + }; + + let description = match value.get("description") { + None | Some(Value::Null) => None, + Some(des) => from_value(des.clone()) + .map(Some) + .map_err(|_| Error::InvalidApiKeyDescription(des.clone()))?, + }; + + let uid = value.get("uid").map_or_else( + || Ok(Uuid::new_v4()), + |uid| from_value(uid.clone()).map_err(|_| Error::InvalidApiKeyUid(uid.clone())), + )?; + + let actions = value + .get("actions") + .map(|act| { + from_value(act.clone()).map_err(|_| Error::InvalidApiKeyActions(act.clone())) + }) + .ok_or(Error::MissingParameter("actions"))??; + + let indexes = value + .get("indexes") + .map(|ind| { + from_value::>(ind.clone()) + // If it's not a vec of string, return an API key parsing error. + .map_err(|_| Error::InvalidApiKeyIndexes(ind.clone())) + .and_then(|ind| { + ind.into_iter() + // If it's not a valid Index uid, return an Index Uid parsing error. + .map(|i| StarOr::::from_str(&i).map_err(Error::from)) + .collect() + }) + }) + .ok_or(Error::MissingParameter("indexes"))??; + + let expires_at = value + .get("expiresAt") + .map(parse_expiration_date) + .ok_or(Error::MissingParameter("expiresAt"))??; + + let created_at = OffsetDateTime::now_utc(); + let updated_at = created_at; + + Ok(Self { name, description, uid, actions, indexes, expires_at, created_at, updated_at }) + } + + pub fn update_from_value(&mut self, value: Value) -> Result<()> { + if let Some(des) = value.get("description") { + let des = + from_value(des.clone()).map_err(|_| Error::InvalidApiKeyDescription(des.clone())); + self.description = des?; + } + + if let Some(des) = value.get("name") { + let des = from_value(des.clone()).map_err(|_| Error::InvalidApiKeyName(des.clone())); + self.name = des?; + } + + if value.get("uid").is_some() { + return Err(Error::ImmutableField("uid".to_string())); + } + + if value.get("actions").is_some() { + return Err(Error::ImmutableField("actions".to_string())); + } + + if value.get("indexes").is_some() { + return Err(Error::ImmutableField("indexes".to_string())); + } + + if value.get("expiresAt").is_some() { + return Err(Error::ImmutableField("expiresAt".to_string())); + } + + if value.get("createdAt").is_some() { + return Err(Error::ImmutableField("createdAt".to_string())); + } + + if value.get("updatedAt").is_some() { + return Err(Error::ImmutableField("updatedAt".to_string())); + } + + self.updated_at = OffsetDateTime::now_utc(); + + Ok(()) + } + + pub fn default_admin() -> Self { + let now = OffsetDateTime::now_utc(); + let uid = Uuid::new_v4(); + Self { + name: Some("Default Admin API Key".to_string()), + description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()), + uid, + actions: vec![Action::All], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: now, + updated_at: now, + } + } + + pub fn default_search() -> Self { + let now = OffsetDateTime::now_utc(); + let uid = Uuid::new_v4(); + Self { + name: Some("Default Search API Key".to_string()), + description: Some("Use it to search from the frontend".to_string()), + uid, + actions: vec![Action::Search], + indexes: vec![StarOr::Star], + expires_at: None, + created_at: now, + updated_at: now, + } + } +} + +fn parse_expiration_date(value: &Value) -> Result> { + match value { + Value::String(string) => OffsetDateTime::parse(string, &Rfc3339) + .or_else(|_| { + PrimitiveDateTime::parse( + string, + format_description!( + "[year repr:full base:calendar]-[month repr:numerical]-[day]T[hour]:[minute]:[second]" + ), + ).map(|datetime| datetime.assume_utc()) + }) + .or_else(|_| { + PrimitiveDateTime::parse( + string, + format_description!( + "[year repr:full base:calendar]-[month repr:numerical]-[day] [hour]:[minute]:[second]" + ), + ).map(|datetime| datetime.assume_utc()) + }) + .or_else(|_| { + Date::parse(string, format_description!( + "[year repr:full base:calendar]-[month repr:numerical]-[day]" + )).map(|date| PrimitiveDateTime::new(date, time!(00:00)).assume_utc()) + }) + .map_err(|_| Error::InvalidApiKeyExpiresAt(value.clone())) + // check if the key is already expired. + .and_then(|d| { + if d > OffsetDateTime::now_utc() { + Ok(d) + } else { + Err(Error::InvalidApiKeyExpiresAt(value.clone())) + } + }) + .map(Option::Some), + Value::Null => Ok(None), + _otherwise => Err(Error::InvalidApiKeyExpiresAt(value.clone())), + } +} + +#[derive(Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence)] +#[repr(u8)] +pub enum Action { + #[serde(rename = "*")] + All = 0, + #[serde(rename = "search")] + Search, + #[serde(rename = "documents.*")] + DocumentsAll, + #[serde(rename = "documents.add")] + DocumentsAdd, + #[serde(rename = "documents.get")] + DocumentsGet, + #[serde(rename = "documents.delete")] + DocumentsDelete, + #[serde(rename = "indexes.*")] + IndexesAll, + #[serde(rename = "indexes.create")] + IndexesAdd, + #[serde(rename = "indexes.get")] + IndexesGet, + #[serde(rename = "indexes.update")] + IndexesUpdate, + #[serde(rename = "indexes.delete")] + IndexesDelete, + #[serde(rename = "indexes.swap")] + IndexesSwap, + #[serde(rename = "tasks.*")] + TasksAll, + #[serde(rename = "tasks.cancel")] + TasksCancel, + #[serde(rename = "tasks.delete")] + TasksDelete, + #[serde(rename = "tasks.get")] + TasksGet, + #[serde(rename = "settings.*")] + SettingsAll, + #[serde(rename = "settings.get")] + SettingsGet, + #[serde(rename = "settings.update")] + SettingsUpdate, + #[serde(rename = "stats.*")] + StatsAll, + #[serde(rename = "stats.get")] + StatsGet, + #[serde(rename = "metrics.*")] + MetricsAll, + #[serde(rename = "metrics.get")] + MetricsGet, + #[serde(rename = "dumps.*")] + DumpsAll, + #[serde(rename = "dumps.create")] + DumpsCreate, + #[serde(rename = "version")] + Version, + #[serde(rename = "keys.create")] + KeysAdd, + #[serde(rename = "keys.get")] + KeysGet, + #[serde(rename = "keys.update")] + KeysUpdate, + #[serde(rename = "keys.delete")] + KeysDelete, +} + +impl Action { + pub const fn from_repr(repr: u8) -> Option { + use actions::*; + match repr { + ALL => Some(Self::All), + SEARCH => Some(Self::Search), + DOCUMENTS_ALL => Some(Self::DocumentsAll), + DOCUMENTS_ADD => Some(Self::DocumentsAdd), + DOCUMENTS_GET => Some(Self::DocumentsGet), + DOCUMENTS_DELETE => Some(Self::DocumentsDelete), + INDEXES_ALL => Some(Self::IndexesAll), + INDEXES_CREATE => Some(Self::IndexesAdd), + INDEXES_GET => Some(Self::IndexesGet), + INDEXES_UPDATE => Some(Self::IndexesUpdate), + INDEXES_DELETE => Some(Self::IndexesDelete), + INDEXES_SWAP => Some(Self::IndexesSwap), + TASKS_ALL => Some(Self::TasksAll), + TASKS_CANCEL => Some(Self::TasksCancel), + TASKS_DELETE => Some(Self::TasksDelete), + TASKS_GET => Some(Self::TasksGet), + SETTINGS_ALL => Some(Self::SettingsAll), + SETTINGS_GET => Some(Self::SettingsGet), + SETTINGS_UPDATE => Some(Self::SettingsUpdate), + STATS_ALL => Some(Self::StatsAll), + STATS_GET => Some(Self::StatsGet), + METRICS_ALL => Some(Self::MetricsAll), + METRICS_GET => Some(Self::MetricsGet), + DUMPS_ALL => Some(Self::DumpsAll), + DUMPS_CREATE => Some(Self::DumpsCreate), + VERSION => Some(Self::Version), + KEYS_CREATE => Some(Self::KeysAdd), + KEYS_GET => Some(Self::KeysGet), + KEYS_UPDATE => Some(Self::KeysUpdate), + KEYS_DELETE => Some(Self::KeysDelete), + _otherwise => None, + } + } + + pub const fn repr(&self) -> u8 { + *self as u8 + } +} + +pub mod actions { + use super::Action::*; + + pub(crate) const ALL: u8 = All.repr(); + pub const SEARCH: u8 = Search.repr(); + pub const DOCUMENTS_ALL: u8 = DocumentsAll.repr(); + pub const DOCUMENTS_ADD: u8 = DocumentsAdd.repr(); + pub const DOCUMENTS_GET: u8 = DocumentsGet.repr(); + pub const DOCUMENTS_DELETE: u8 = DocumentsDelete.repr(); + pub const INDEXES_ALL: u8 = IndexesAll.repr(); + pub const INDEXES_CREATE: u8 = IndexesAdd.repr(); + pub const INDEXES_GET: u8 = IndexesGet.repr(); + pub const INDEXES_UPDATE: u8 = IndexesUpdate.repr(); + pub const INDEXES_DELETE: u8 = IndexesDelete.repr(); + pub const INDEXES_SWAP: u8 = IndexesSwap.repr(); + pub const TASKS_ALL: u8 = TasksAll.repr(); + pub const TASKS_CANCEL: u8 = TasksCancel.repr(); + pub const TASKS_DELETE: u8 = TasksDelete.repr(); + pub const TASKS_GET: u8 = TasksGet.repr(); + pub const SETTINGS_ALL: u8 = SettingsAll.repr(); + pub const SETTINGS_GET: u8 = SettingsGet.repr(); + pub const SETTINGS_UPDATE: u8 = SettingsUpdate.repr(); + pub const STATS_ALL: u8 = StatsAll.repr(); + pub const STATS_GET: u8 = StatsGet.repr(); + pub const METRICS_ALL: u8 = MetricsAll.repr(); + pub const METRICS_GET: u8 = MetricsGet.repr(); + pub const DUMPS_ALL: u8 = DumpsAll.repr(); + pub const DUMPS_CREATE: u8 = DumpsCreate.repr(); + pub const VERSION: u8 = Version.repr(); + pub const KEYS_CREATE: u8 = KeysAdd.repr(); + pub const KEYS_GET: u8 = KeysGet.repr(); + pub const KEYS_UPDATE: u8 = KeysUpdate.repr(); + pub const KEYS_DELETE: u8 = KeysDelete.repr(); +} + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("`{0}` field is mandatory.")] + MissingParameter(&'static str), + #[error("`actions` field value `{0}` is invalid. It should be an array of string representing action names.")] + InvalidApiKeyActions(Value), + #[error("`indexes` field value `{0}` is invalid. It should be an array of string representing index names.")] + InvalidApiKeyIndexes(Value), + #[error("{0}")] + InvalidApiKeyIndexUid(IndexUidFormatError), + #[error("`expiresAt` field value `{0}` is invalid. It should follow the RFC 3339 format to represents a date or datetime in the future or specified as a null value. e.g. 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")] + InvalidApiKeyExpiresAt(Value), + #[error("`description` field value `{0}` is invalid. It should be a string or specified as a null value.")] + InvalidApiKeyDescription(Value), + #[error( + "`name` field value `{0}` is invalid. It should be a string or specified as a null value." + )] + InvalidApiKeyName(Value), + #[error("`uid` field value `{0}` is invalid. It should be a valid UUID v4 string or omitted.")] + InvalidApiKeyUid(Value), + #[error("The `{0}` field cannot be modified for the given resource.")] + ImmutableField(String), +} + +impl From for Error { + fn from(e: IndexUidFormatError) -> Self { + Self::InvalidApiKeyIndexUid(e) + } +} + +impl ErrorCode for Error { + fn error_code(&self) -> Code { + match self { + Self::MissingParameter(_) => Code::MissingParameter, + Self::InvalidApiKeyActions(_) => Code::InvalidApiKeyActions, + Self::InvalidApiKeyIndexes(_) | Self::InvalidApiKeyIndexUid(_) => { + Code::InvalidApiKeyIndexes + } + Self::InvalidApiKeyExpiresAt(_) => Code::InvalidApiKeyExpiresAt, + Self::InvalidApiKeyDescription(_) => Code::InvalidApiKeyDescription, + Self::InvalidApiKeyName(_) => Code::InvalidApiKeyName, + Self::InvalidApiKeyUid(_) => Code::InvalidApiKeyUid, + Self::ImmutableField(_) => Code::ImmutableField, + } + } +} diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs index 2d685c2dc..c7f7ca7f5 100644 --- a/meilisearch-types/src/lib.rs +++ b/meilisearch-types/src/lib.rs @@ -1,3 +1,17 @@ +pub mod compression; +pub mod document_formats; pub mod error; pub mod index_uid; +pub mod keys; +pub mod settings; pub mod star_or; +pub mod tasks; +pub mod versioning; + +pub use milli; +pub use milli::{heed, Index}; +use uuid::Uuid; +pub use versioning::VERSION_FILE_NAME; + +pub type Document = serde_json::Map; +pub type InstanceUid = Uuid; diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-types/src/settings.rs similarity index 78% rename from meilisearch-lib/src/index/updates.rs rename to meilisearch-types/src/settings.rs index b6f601753..3369cfdfb 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-types/src/settings.rs @@ -2,18 +2,14 @@ use std::collections::{BTreeMap, BTreeSet}; use std::marker::PhantomData; use std::num::NonZeroUsize; -use log::{debug, info, trace}; -use milli::documents::DocumentsBatchReader; -use milli::update::{ - DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, - Setting, -}; +use fst::IntoStreamer; +use milli::update::Setting; +use milli::{Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; -use uuid::Uuid; -use super::error::{IndexError, Result}; -use super::index::{Index, IndexMeta}; -use crate::update_file_store::UpdateFileStore; +/// The maximimum number of results that the engine +/// will be able to return in one search call. +pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; fn serialize_with_wildcard( field: &Setting>, @@ -38,7 +34,7 @@ pub struct Checked; pub struct Unchecked; #[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] pub struct MinWordSizeTyposSetting { @@ -51,7 +47,7 @@ pub struct MinWordSizeTyposSetting { } #[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] pub struct TypoSettings { @@ -70,7 +66,7 @@ pub struct TypoSettings { } #[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] pub struct FacetingSettings { @@ -80,7 +76,7 @@ pub struct FacetingSettings { } #[cfg_attr(test, derive(proptest_derive::Arbitrary))] -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] pub struct PaginationSettings { @@ -92,7 +88,7 @@ pub struct PaginationSettings { /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// call to `check` will return a `Settings` from a `Settings`. -#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] #[serde(bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>"))] @@ -246,126 +242,6 @@ pub struct Facets { pub min_level_size: Option, } -impl Index { - fn update_primary_key_txn<'a, 'b>( - &'a self, - txn: &mut milli::heed::RwTxn<'a, 'b>, - primary_key: String, - ) -> Result { - let mut builder = milli::update::Settings::new(txn, self, self.indexer_config.as_ref()); - builder.set_primary_key(primary_key); - builder.execute(|_| ())?; - let meta = IndexMeta::new_txn(self, txn)?; - - Ok(meta) - } - - pub fn update_primary_key(&self, primary_key: String) -> Result { - let mut txn = self.write_txn()?; - let res = self.update_primary_key_txn(&mut txn, primary_key)?; - txn.commit()?; - - Ok(res) - } - - /// Deletes `ids` from the index, and returns how many documents were deleted. - pub fn delete_documents(&self, ids: &[String]) -> Result { - let mut txn = self.write_txn()?; - let mut builder = milli::update::DeleteDocuments::new(&mut txn, self)?; - - // We ignore unexisting document ids - ids.iter().for_each(|id| { - builder.delete_external_id(id); - }); - - let deleted = builder.execute()?; - - txn.commit()?; - - Ok(deleted) - } - - pub fn clear_documents(&self) -> Result<()> { - let mut txn = self.write_txn()?; - milli::update::ClearDocuments::new(&mut txn, self).execute()?; - txn.commit()?; - - Ok(()) - } - - pub fn update_documents( - &self, - method: IndexDocumentsMethod, - primary_key: Option, - file_store: UpdateFileStore, - contents: impl IntoIterator, - ) -> Result>> { - trace!("performing document addition"); - let mut txn = self.write_txn()?; - - if let Some(primary_key) = primary_key { - if self.primary_key(&txn)?.is_none() { - self.update_primary_key_txn(&mut txn, primary_key)?; - } - } - - let config = IndexDocumentsConfig { - update_method: method, - ..Default::default() - }; - - let indexing_callback = |indexing_step| debug!("update: {:?}", indexing_step); - let mut builder = milli::update::IndexDocuments::new( - &mut txn, - self, - self.indexer_config.as_ref(), - config, - indexing_callback, - )?; - - let mut results = Vec::new(); - for content_uuid in contents.into_iter() { - let content_file = file_store.get_update(content_uuid)?; - let reader = DocumentsBatchReader::from_reader(content_file)?; - let (new_builder, user_result) = builder.add_documents(reader)?; - builder = new_builder; - - let user_result = match user_result { - Ok(count) => Ok(DocumentAdditionResult { - indexed_documents: count, - number_of_documents: count, - }), - Err(e) => Err(IndexError::from(e)), - }; - - results.push(user_result); - } - - if results.iter().any(Result::is_ok) { - let addition = builder.execute()?; - txn.commit()?; - info!("document addition done: {:?}", addition); - } - - Ok(results) - } - - pub fn update_settings(&self, settings: &Settings) -> Result<()> { - // We must use the write transaction of the update here. - let mut txn = self.write_txn()?; - let mut builder = - milli::update::Settings::new(&mut txn, self, self.indexer_config.as_ref()); - - apply_settings_to_builder(settings, &mut builder); - - builder.execute(|indexing_step| debug!("update: {:?}", indexing_step))?; - - txn.commit()?; - - Ok(()) - } -} - pub fn apply_settings_to_builder( settings: &Settings, builder: &mut milli::update::Settings, @@ -496,6 +372,96 @@ pub fn apply_settings_to_builder( } } +pub fn settings( + index: &Index, + rtxn: &crate::heed::RoTxn, +) -> Result, milli::Error> { + let displayed_attributes = + index.displayed_fields(rtxn)?.map(|fields| fields.into_iter().map(String::from).collect()); + + let searchable_attributes = index + .user_defined_searchable_fields(rtxn)? + .map(|fields| fields.into_iter().map(String::from).collect()); + + let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); + + let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); + + let criteria = index.criteria(rtxn)?.into_iter().map(|c| c.to_string()).collect(); + + let stop_words = index + .stop_words(rtxn)? + .map(|stop_words| -> Result, milli::Error> { + Ok(stop_words.stream().into_strs()?.into_iter().collect()) + }) + .transpose()? + .unwrap_or_default(); + let distinct_field = index.distinct_field(rtxn)?.map(String::from); + + // in milli each word in the synonyms map were split on their separator. Since we lost + // this information we are going to put space between words. + let synonyms = index + .synonyms(rtxn)? + .iter() + .map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect())) + .collect(); + + let min_typo_word_len = MinWordSizeTyposSetting { + one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?), + two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?), + }; + + let disabled_words = match index.exact_words(rtxn)? { + Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(), + None => BTreeSet::new(), + }; + + let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect(); + + let typo_tolerance = TypoSettings { + enabled: Setting::Set(index.authorize_typos(rtxn)?), + min_word_size_for_typos: Setting::Set(min_typo_word_len), + disable_on_words: Setting::Set(disabled_words), + disable_on_attributes: Setting::Set(disabled_attributes), + }; + + let faceting = FacetingSettings { + max_values_per_facet: Setting::Set( + index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET), + ), + }; + + let pagination = PaginationSettings { + max_total_hits: Setting::Set( + index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS), + ), + }; + + Ok(Settings { + displayed_attributes: match displayed_attributes { + Some(attrs) => Setting::Set(attrs), + None => Setting::Reset, + }, + searchable_attributes: match searchable_attributes { + Some(attrs) => Setting::Set(attrs), + None => Setting::Reset, + }, + filterable_attributes: Setting::Set(filterable_attributes), + sortable_attributes: Setting::Set(sortable_attributes), + ranking_rules: Setting::Set(criteria), + stop_words: Setting::Set(stop_words), + distinct_attribute: match distinct_field { + Some(field) => Setting::Set(field), + None => Setting::Reset, + }, + synonyms: Setting::Set(synonyms), + typo_tolerance: Setting::Set(typo_tolerance), + faceting: Setting::Set(faceting), + pagination: Setting::Set(pagination), + _kind: PhantomData, + }) +} + #[cfg(test)] pub(crate) mod test { use proptest::prelude::*; @@ -503,11 +469,7 @@ pub(crate) mod test { use super::*; pub(super) fn setting_strategy() -> impl Strategy> { - prop_oneof![ - Just(Setting::NotSet), - Just(Setting::Reset), - any::().prop_map(Setting::Set) - ] + prop_oneof![Just(Setting::NotSet), Just(Setting::Reset), any::().prop_map(Setting::Set)] } #[test] @@ -530,10 +492,7 @@ pub(crate) mod test { let checked = settings.clone().check(); assert_eq!(settings.displayed_attributes, checked.displayed_attributes); - assert_eq!( - settings.searchable_attributes, - checked.searchable_attributes - ); + assert_eq!(settings.searchable_attributes, checked.searchable_attributes); // test wildcard // test no changes diff --git a/meilisearch-types/src/star_or.rs b/meilisearch-types/src/star_or.rs index 02c9c3524..e89ba6b0e 100644 --- a/meilisearch-types/src/star_or.rs +++ b/meilisearch-types/src/star_or.rs @@ -1,13 +1,14 @@ -use serde::de::Visitor; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::{Display, Formatter}; use std::marker::PhantomData; use std::ops::Deref; use std::str::FromStr; +use serde::de::Visitor; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + /// A type that tries to match either a star (*) or /// any other thing that implements `FromStr`. -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum StarOr { Star, Other(T), @@ -121,9 +122,10 @@ where #[cfg(test)] mod tests { - use super::*; use serde_json::{json, Value}; + use super::*; + #[test] fn star_or_serde_roundtrip() { fn roundtrip(content: Value, expected: StarOr) { diff --git a/meilisearch-types/src/tasks.rs b/meilisearch-types/src/tasks.rs new file mode 100644 index 000000000..ceddbd51c --- /dev/null +++ b/meilisearch-types/src/tasks.rs @@ -0,0 +1,563 @@ +use std::collections::HashSet; +use std::fmt::{Display, Write}; +use std::str::FromStr; + +use enum_iterator::Sequence; +use milli::update::IndexDocumentsMethod; +use roaring::RoaringBitmap; +use serde::{Deserialize, Serialize, Serializer}; +use time::{Duration, OffsetDateTime}; +use uuid::Uuid; + +use crate::error::{Code, ResponseError}; +use crate::keys::Key; +use crate::settings::{Settings, Unchecked}; +use crate::InstanceUid; + +pub type TaskId = u32; + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct Task { + pub uid: TaskId, + + #[serde(with = "time::serde::rfc3339")] + pub enqueued_at: OffsetDateTime, + #[serde(with = "time::serde::rfc3339::option")] + pub started_at: Option, + #[serde(with = "time::serde::rfc3339::option")] + pub finished_at: Option, + + pub error: Option, + pub canceled_by: Option, + pub details: Option
, + + pub status: Status, + pub kind: KindWithContent, +} + +impl Task { + pub fn index_uid(&self) -> Option<&str> { + use KindWithContent::*; + + match &self.kind { + DumpCreation { .. } + | SnapshotCreation + | TaskCancelation { .. } + | TaskDeletion { .. } + | IndexSwap { .. } => None, + DocumentAdditionOrUpdate { index_uid, .. } + | DocumentDeletion { index_uid, .. } + | DocumentClear { index_uid } + | SettingsUpdate { index_uid, .. } + | IndexCreation { index_uid, .. } + | IndexUpdate { index_uid, .. } + | IndexDeletion { index_uid } => Some(index_uid), + } + } + + /// Return the list of indexes updated by this tasks. + pub fn indexes(&self) -> Vec<&str> { + self.kind.indexes() + } + + /// Return the content-uuid if there is one + pub fn content_uuid(&self) -> Option { + match self.kind { + KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file), + KindWithContent::DocumentDeletion { .. } + | KindWithContent::DocumentClear { .. } + | KindWithContent::SettingsUpdate { .. } + | KindWithContent::IndexDeletion { .. } + | KindWithContent::IndexCreation { .. } + | KindWithContent::IndexUpdate { .. } + | KindWithContent::IndexSwap { .. } + | KindWithContent::TaskCancelation { .. } + | KindWithContent::TaskDeletion { .. } + | KindWithContent::DumpCreation { .. } + | KindWithContent::SnapshotCreation => None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum KindWithContent { + DocumentAdditionOrUpdate { + index_uid: String, + primary_key: Option, + method: IndexDocumentsMethod, + content_file: Uuid, + documents_count: u64, + allow_index_creation: bool, + }, + DocumentDeletion { + index_uid: String, + documents_ids: Vec, + }, + DocumentClear { + index_uid: String, + }, + SettingsUpdate { + index_uid: String, + new_settings: Box>, + is_deletion: bool, + allow_index_creation: bool, + }, + IndexDeletion { + index_uid: String, + }, + IndexCreation { + index_uid: String, + primary_key: Option, + }, + IndexUpdate { + index_uid: String, + primary_key: Option, + }, + IndexSwap { + swaps: Vec, + }, + TaskCancelation { + query: String, + tasks: RoaringBitmap, + }, + TaskDeletion { + query: String, + tasks: RoaringBitmap, + }, + DumpCreation { + keys: Vec, + instance_uid: Option, + }, + SnapshotCreation, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct IndexSwap { + pub indexes: (String, String), +} + +impl KindWithContent { + pub fn as_kind(&self) -> Kind { + match self { + KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate, + KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion, + KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion, + KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate, + KindWithContent::IndexCreation { .. } => Kind::IndexCreation, + KindWithContent::IndexDeletion { .. } => Kind::IndexDeletion, + KindWithContent::IndexUpdate { .. } => Kind::IndexUpdate, + KindWithContent::IndexSwap { .. } => Kind::IndexSwap, + KindWithContent::TaskCancelation { .. } => Kind::TaskCancelation, + KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion, + KindWithContent::DumpCreation { .. } => Kind::DumpCreation, + KindWithContent::SnapshotCreation => Kind::SnapshotCreation, + } + } + + pub fn indexes(&self) -> Vec<&str> { + use KindWithContent::*; + + match self { + DumpCreation { .. } + | SnapshotCreation + | TaskCancelation { .. } + | TaskDeletion { .. } => vec![], + DocumentAdditionOrUpdate { index_uid, .. } + | DocumentDeletion { index_uid, .. } + | DocumentClear { index_uid } + | SettingsUpdate { index_uid, .. } + | IndexCreation { index_uid, .. } + | IndexUpdate { index_uid, .. } + | IndexDeletion { index_uid } => vec![index_uid], + IndexSwap { swaps } => { + let mut indexes = HashSet::<&str>::default(); + for swap in swaps { + indexes.insert(swap.indexes.0.as_str()); + indexes.insert(swap.indexes.1.as_str()); + } + indexes.into_iter().collect() + } + } + } + + /// Returns the default `Details` that correspond to this `KindWithContent`, + /// `None` if it cannot be generated. + pub fn default_details(&self) -> Option
{ + match self { + KindWithContent::DocumentAdditionOrUpdate { documents_count, .. } => { + Some(Details::DocumentAdditionOrUpdate { + received_documents: *documents_count, + indexed_documents: None, + }) + } + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + Some(Details::DocumentDeletion { + provided_ids: documents_ids.len(), + deleted_documents: None, + }) + } + KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => { + Some(Details::ClearAll { deleted_documents: None }) + } + KindWithContent::SettingsUpdate { new_settings, .. } => { + Some(Details::SettingsUpdate { settings: new_settings.clone() }) + } + KindWithContent::IndexCreation { primary_key, .. } + | KindWithContent::IndexUpdate { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexSwap { swaps } => { + Some(Details::IndexSwap { swaps: swaps.clone() }) + } + KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { + matched_tasks: tasks.len(), + canceled_tasks: None, + original_filter: query.clone(), + }), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len(), + deleted_tasks: None, + original_filter: query.clone(), + }), + KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), + KindWithContent::SnapshotCreation => None, + } + } + + pub fn default_finished_details(&self) -> Option
{ + match self { + KindWithContent::DocumentAdditionOrUpdate { documents_count, .. } => { + Some(Details::DocumentAdditionOrUpdate { + received_documents: *documents_count, + indexed_documents: Some(0), + }) + } + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + Some(Details::DocumentDeletion { + provided_ids: documents_ids.len(), + deleted_documents: Some(0), + }) + } + KindWithContent::DocumentClear { .. } => { + Some(Details::ClearAll { deleted_documents: None }) + } + KindWithContent::SettingsUpdate { new_settings, .. } => { + Some(Details::SettingsUpdate { settings: new_settings.clone() }) + } + KindWithContent::IndexDeletion { .. } => None, + KindWithContent::IndexCreation { primary_key, .. } + | KindWithContent::IndexUpdate { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexSwap { .. } => { + todo!() + } + KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { + matched_tasks: tasks.len(), + canceled_tasks: Some(0), + original_filter: query.clone(), + }), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len(), + deleted_tasks: Some(0), + original_filter: query.clone(), + }), + KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), + KindWithContent::SnapshotCreation => None, + } + } +} + +impl From<&KindWithContent> for Option
{ + fn from(kind: &KindWithContent) -> Self { + match kind { + KindWithContent::DocumentAdditionOrUpdate { documents_count, .. } => { + Some(Details::DocumentAdditionOrUpdate { + received_documents: *documents_count, + indexed_documents: None, + }) + } + KindWithContent::DocumentDeletion { .. } => None, + KindWithContent::DocumentClear { .. } => None, + KindWithContent::SettingsUpdate { new_settings, .. } => { + Some(Details::SettingsUpdate { settings: new_settings.clone() }) + } + KindWithContent::IndexDeletion { .. } => None, + KindWithContent::IndexCreation { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexUpdate { primary_key, .. } => { + Some(Details::IndexInfo { primary_key: primary_key.clone() }) + } + KindWithContent::IndexSwap { .. } => None, + KindWithContent::TaskCancelation { query, tasks } => Some(Details::TaskCancelation { + matched_tasks: tasks.len(), + canceled_tasks: None, + original_filter: query.clone(), + }), + KindWithContent::TaskDeletion { query, tasks } => Some(Details::TaskDeletion { + matched_tasks: tasks.len(), + deleted_tasks: None, + original_filter: query.clone(), + }), + KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), + KindWithContent::SnapshotCreation => None, + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence)] +#[serde(rename_all = "camelCase")] +pub enum Status { + Enqueued, + Processing, + Succeeded, + Failed, + Canceled, +} + +impl Display for Status { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Status::Enqueued => write!(f, "enqueued"), + Status::Processing => write!(f, "processing"), + Status::Succeeded => write!(f, "succeeded"), + Status::Failed => write!(f, "failed"), + Status::Canceled => write!(f, "canceled"), + } + } +} + +impl FromStr for Status { + type Err = ResponseError; + + fn from_str(status: &str) -> Result { + if status.eq_ignore_ascii_case("enqueued") { + Ok(Status::Enqueued) + } else if status.eq_ignore_ascii_case("processing") { + Ok(Status::Processing) + } else if status.eq_ignore_ascii_case("succeeded") { + Ok(Status::Succeeded) + } else if status.eq_ignore_ascii_case("failed") { + Ok(Status::Failed) + } else if status.eq_ignore_ascii_case("canceled") { + Ok(Status::Canceled) + } else { + Err(ResponseError::from_msg( + format!( + "`{}` is not a status. Available status are {}.", + status, + enum_iterator::all::() + .map(|s| format!("`{s}`")) + .collect::>() + .join(", ") + ), + Code::BadRequest, + )) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Sequence)] +#[serde(rename_all = "camelCase")] +pub enum Kind { + DocumentAdditionOrUpdate, + DocumentDeletion, + SettingsUpdate, + IndexCreation, + IndexDeletion, + IndexUpdate, + IndexSwap, + TaskCancelation, + TaskDeletion, + DumpCreation, + SnapshotCreation, +} + +impl Kind { + pub fn related_to_one_index(&self) -> bool { + match self { + Kind::DocumentAdditionOrUpdate + | Kind::DocumentDeletion + | Kind::SettingsUpdate + | Kind::IndexCreation + | Kind::IndexDeletion + | Kind::IndexUpdate => true, + Kind::IndexSwap + | Kind::TaskCancelation + | Kind::TaskDeletion + | Kind::DumpCreation + | Kind::SnapshotCreation => false, + } + } +} +impl Display for Kind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"), + Kind::DocumentDeletion => write!(f, "documentDeletion"), + Kind::SettingsUpdate => write!(f, "settingsUpdate"), + Kind::IndexCreation => write!(f, "indexCreation"), + Kind::IndexDeletion => write!(f, "indexDeletion"), + Kind::IndexUpdate => write!(f, "indexUpdate"), + Kind::IndexSwap => write!(f, "indexSwap"), + Kind::TaskCancelation => write!(f, "taskCancelation"), + Kind::TaskDeletion => write!(f, "taskDeletion"), + Kind::DumpCreation => write!(f, "dumpCreation"), + Kind::SnapshotCreation => write!(f, "snapshotCreation"), + } + } +} +impl FromStr for Kind { + type Err = ResponseError; + + fn from_str(kind: &str) -> Result { + if kind.eq_ignore_ascii_case("indexCreation") { + Ok(Kind::IndexCreation) + } else if kind.eq_ignore_ascii_case("indexUpdate") { + Ok(Kind::IndexUpdate) + } else if kind.eq_ignore_ascii_case("indexSwap") { + Ok(Kind::IndexSwap) + } else if kind.eq_ignore_ascii_case("indexDeletion") { + Ok(Kind::IndexDeletion) + } else if kind.eq_ignore_ascii_case("documentAdditionOrUpdate") { + Ok(Kind::DocumentAdditionOrUpdate) + } else if kind.eq_ignore_ascii_case("documentDeletion") { + Ok(Kind::DocumentDeletion) + } else if kind.eq_ignore_ascii_case("settingsUpdate") { + Ok(Kind::SettingsUpdate) + } else if kind.eq_ignore_ascii_case("taskCancelation") { + Ok(Kind::TaskCancelation) + } else if kind.eq_ignore_ascii_case("taskDeletion") { + Ok(Kind::TaskDeletion) + } else if kind.eq_ignore_ascii_case("dumpCreation") { + Ok(Kind::DumpCreation) + } else if kind.eq_ignore_ascii_case("snapshotCreation") { + Ok(Kind::SnapshotCreation) + } else { + Err(ResponseError::from_msg( + format!( + "`{}` is not a type. Available types are {}.", + kind, + enum_iterator::all::() + .map(|k| format!( + "`{}`", + // by default serde is going to insert `"` around the value. + serde_json::to_string(&k).unwrap().trim_matches('"') + )) + .collect::>() + .join(", ") + ), + Code::BadRequest, + )) + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub enum Details { + DocumentAdditionOrUpdate { received_documents: u64, indexed_documents: Option }, + SettingsUpdate { settings: Box> }, + IndexInfo { primary_key: Option }, + DocumentDeletion { provided_ids: usize, deleted_documents: Option }, + ClearAll { deleted_documents: Option }, + TaskCancelation { matched_tasks: u64, canceled_tasks: Option, original_filter: String }, + TaskDeletion { matched_tasks: u64, deleted_tasks: Option, original_filter: String }, + Dump { dump_uid: Option }, + IndexSwap { swaps: Vec }, +} + +impl Details { + pub fn to_failed(&self) -> Self { + let mut details = self.clone(); + match &mut details { + Self::DocumentAdditionOrUpdate { indexed_documents, .. } => { + *indexed_documents = Some(0) + } + Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0), + Self::ClearAll { deleted_documents } => *deleted_documents = Some(0), + Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0), + Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0), + Self::SettingsUpdate { .. } + | Self::IndexInfo { .. } + | Self::Dump { .. } + | Self::IndexSwap { .. } => (), + } + + details + } +} + +/// Serialize a `time::Duration` as a best effort ISO 8601 while waiting for +/// https://github.com/time-rs/time/issues/378. +/// This code is a port of the old code of time that was removed in 0.2. +pub fn serialize_duration( + duration: &Option, + serializer: S, +) -> Result { + match duration { + Some(duration) => { + // technically speaking, negative duration is not valid ISO 8601 + if duration.is_negative() { + return serializer.serialize_none(); + } + + const SECS_PER_DAY: i64 = Duration::DAY.whole_seconds(); + let secs = duration.whole_seconds(); + let days = secs / SECS_PER_DAY; + let secs = secs - days * SECS_PER_DAY; + let hasdate = days != 0; + let nanos = duration.subsec_nanoseconds(); + let hastime = (secs != 0 || nanos != 0) || !hasdate; + + // all the following unwrap can't fail + let mut res = String::new(); + write!(&mut res, "P").unwrap(); + + if hasdate { + write!(&mut res, "{}D", days).unwrap(); + } + + const NANOS_PER_MILLI: i32 = Duration::MILLISECOND.subsec_nanoseconds(); + const NANOS_PER_MICRO: i32 = Duration::MICROSECOND.subsec_nanoseconds(); + + if hastime { + if nanos == 0 { + write!(&mut res, "T{}S", secs).unwrap(); + } else if nanos % NANOS_PER_MILLI == 0 { + write!(&mut res, "T{}.{:03}S", secs, nanos / NANOS_PER_MILLI).unwrap(); + } else if nanos % NANOS_PER_MICRO == 0 { + write!(&mut res, "T{}.{:06}S", secs, nanos / NANOS_PER_MICRO).unwrap(); + } else { + write!(&mut res, "T{}.{:09}S", secs, nanos).unwrap(); + } + } + + serializer.serialize_str(&res) + } + None => serializer.serialize_none(), + } +} + +#[cfg(test)] +mod tests { + use super::Details; + use crate::heed::types::SerdeJson; + use crate::heed::{BytesDecode, BytesEncode}; + + #[test] + fn bad_deser() { + let details = Details::TaskDeletion { + matched_tasks: 1, + deleted_tasks: None, + original_filter: "hello".to_owned(), + }; + let serialised = SerdeJson::
::bytes_encode(&details).unwrap(); + let deserialised = SerdeJson::
::bytes_decode(&serialised).unwrap(); + meili_snap::snapshot!(format!("{:?}", details), @r###"TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_filter: "hello" }"###); + meili_snap::snapshot!(format!("{:?}", deserialised), @r###"TaskDeletion { matched_tasks: 1, deleted_tasks: None, original_filter: "hello" }"###); + } +} diff --git a/meilisearch-types/src/versioning.rs b/meilisearch-types/src/versioning.rs new file mode 100644 index 000000000..bf1efe1ad --- /dev/null +++ b/meilisearch-types/src/versioning.rs @@ -0,0 +1,61 @@ +use std::fs; +use std::io::{self, ErrorKind}; +use std::path::Path; + +/// The name of the file that contains the version of the database. +pub const VERSION_FILE_NAME: &str = "VERSION"; + +static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR"); +static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); +static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); + +/// Persists the version of the current Meilisearch binary to a VERSION file +pub fn create_version_file(db_path: &Path) -> io::Result<()> { + let version_path = db_path.join(VERSION_FILE_NAME); + fs::write(version_path, format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)) +} + +/// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch. +pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { + let version_path = db_path.join(VERSION_FILE_NAME); + + match fs::read_to_string(&version_path) { + Ok(version) => { + let version_components = version.split('.').collect::>(); + let (major, minor, patch) = match &version_components[..] { + [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), + _ => return Err(VersionFileError::MalformedVersionFile.into()), + }; + + if major != VERSION_MAJOR || minor != VERSION_MINOR { + return Err(VersionFileError::VersionMismatch { major, minor, patch }.into()); + } + } + Err(error) => { + return match error.kind() { + ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()), + _ => Err(error.into()), + } + } + } + + Ok(()) +} + +#[derive(thiserror::Error, Debug)] +pub enum VersionFileError { + #[error( + "Meilisearch (v{}) failed to infer the version of the database. + To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.", + env!("CARGO_PKG_VERSION").to_string() + )] + MissingVersionFile, + #[error("Version file is corrupted and thus Meilisearch is unable to determine the version of the database.")] + MalformedVersionFile, + #[error( + "Expected Meilisearch engine version: {major}.{minor}.{patch}, current engine version: {}. + To update Meilisearch please follow our guide on https://docs.meilisearch.com/learn/advanced/updating.html.", + env!("CARGO_PKG_VERSION").to_string() + )] + VersionMismatch { major: String, minor: String, patch: String }, +} diff --git a/permissive-json-pointer/Cargo.toml b/permissive-json-pointer/Cargo.toml index 21fad5f53..b036a0c88 100644 --- a/permissive-json-pointer/Cargo.toml +++ b/permissive-json-pointer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "permissive-json-pointer" -version = "0.29.2" +version = "0.30.0" edition = "2021" description = "A permissive json pointer" readme = "README.md" diff --git a/permissive-json-pointer/src/lib.rs b/permissive-json-pointer/src/lib.rs index 52f181980..039bd3320 100644 --- a/permissive-json-pointer/src/lib.rs +++ b/permissive-json-pointer/src/lib.rs @@ -25,11 +25,7 @@ const SPLIT_SYMBOL: char = '.'; /// ``` fn contained_in(selector: &str, key: &str) -> bool { selector.starts_with(key) - && selector[key.len()..] - .chars() - .next() - .map(|c| c == SPLIT_SYMBOL) - .unwrap_or(true) + && selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true) } /// Map the selected leaf values of a json allowing you to update only the fields that were selected. @@ -244,10 +240,7 @@ mod tests { fn test_contained_in() { assert!(contained_in("animaux", "animaux")); assert!(contained_in("animaux.chien", "animaux")); - assert!(contained_in( - "animaux.chien.race.bouvier bernois.fourrure.couleur", - "animaux" - )); + assert!(contained_in("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux")); assert!(contained_in( "animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux.chien" @@ -726,14 +719,12 @@ mod tests { } }); - map_leaf_values( - value.as_object_mut().unwrap(), - ["jean.race.name"], - |key, value| match (value, key) { + map_leaf_values(value.as_object_mut().unwrap(), ["jean.race.name"], |key, value| { + match (value, key) { (Value::String(name), "jean.race.name") => *name = S("patou"), _ => unreachable!(), - }, - ); + } + }); assert_eq!( value,