From fa4d8b834892dde62c48b65930fa6655c728b020 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Mar 2023 17:57:57 +0000 Subject: [PATCH 01/22] Bump Swatinem/rust-cache from 2.2.0 to 2.2.1 Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.0 to 2.2.1. - [Release notes](https://github.com/Swatinem/rust-cache/releases) - [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md) - [Commits](https://github.com/Swatinem/rust-cache/compare/v2.2.0...v2.2.1) --- updated-dependencies: - dependency-name: Swatinem/rust-cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/uffizzi-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/uffizzi-build.yml b/.github/workflows/uffizzi-build.yml index 934d91522..922a8b533 100644 --- a/.github/workflows/uffizzi-build.yml +++ b/.github/workflows/uffizzi-build.yml @@ -23,7 +23,7 @@ jobs: target: x86_64-unknown-linux-musl - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.0 + uses: Swatinem/rust-cache@v2.2.1 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 From 3a0314f9deb6b683c9b680b3462fd89b112ed447 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Mar 2023 17:58:05 +0000 Subject: [PATCH 02/22] Bump svenstaro/upload-release-action from 2.4.0 to 2.5.0 Bumps [svenstaro/upload-release-action](https://github.com/svenstaro/upload-release-action) from 2.4.0 to 2.5.0. - [Release notes](https://github.com/svenstaro/upload-release-action/releases) - [Changelog](https://github.com/svenstaro/upload-release-action/blob/master/CHANGELOG.md) - [Commits](https://github.com/svenstaro/upload-release-action/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: svenstaro/upload-release-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/publish-binaries.yml | 8 ++++---- .github/workflows/publish-deb-brew-pkg.yml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 13555cbac..151b522fd 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -54,7 +54,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.4.0 + uses: svenstaro/upload-release-action@2.5.0 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/meilisearch @@ -87,7 +87,7 @@ jobs: # No need to upload binaries for dry run (cron) - name: Upload binaries to release if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.4.0 + uses: svenstaro/upload-release-action@2.5.0 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} @@ -123,7 +123,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.4.0 + uses: svenstaro/upload-release-action@2.5.0 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch @@ -183,7 +183,7 @@ jobs: - name: Upload the binary to release # No need to upload binaries for dry run (cron) if: github.event_name == 'release' - uses: svenstaro/upload-release-action@2.4.0 + uses: svenstaro/upload-release-action@2.5.0 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index 13b08d071..a382df3ed 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -35,7 +35,7 @@ jobs: - name: Build deb package run: cargo deb -p meilisearch -o target/debian/meilisearch.deb - name: Upload debian pkg to release - uses: svenstaro/upload-release-action@2.4.0 + uses: svenstaro/upload-release-action@2.5.0 with: repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb From bcd3f6054a93985a9a0d8010f9d8d83c2024f652 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Mar 2023 17:58:11 +0000 Subject: [PATCH 03/22] Bump docker/build-push-action from 3 to 4 Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 3 to 4. - [Release notes](https://github.com/docker/build-push-action/releases) - [Commits](https://github.com/docker/build-push-action/compare/v3...v4) --- updated-dependencies: - dependency-name: docker/build-push-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/uffizzi-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/uffizzi-build.yml b/.github/workflows/uffizzi-build.yml index 934d91522..f285b03ee 100644 --- a/.github/workflows/uffizzi-build.yml +++ b/.github/workflows/uffizzi-build.yml @@ -53,7 +53,7 @@ jobs: type=raw,value=60d - name: Build Image - uses: docker/build-push-action@v3 + uses: docker/build-push-action@v4 with: context: ./ file: .github/uffizzi/Dockerfile From c0d8eb295d208502c010f4f11a7e8764e4372c2e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Mar 2023 17:58:18 +0000 Subject: [PATCH 04/22] Bump docker/metadata-action from 3 to 4 Bumps [docker/metadata-action](https://github.com/docker/metadata-action) from 3 to 4. - [Release notes](https://github.com/docker/metadata-action/releases) - [Upgrade guide](https://github.com/docker/metadata-action/blob/master/UPGRADE.md) - [Commits](https://github.com/docker/metadata-action/compare/v3...v4) --- updated-dependencies: - dependency-name: docker/metadata-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/uffizzi-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/uffizzi-build.yml b/.github/workflows/uffizzi-build.yml index 934d91522..94dc0ee76 100644 --- a/.github/workflows/uffizzi-build.yml +++ b/.github/workflows/uffizzi-build.yml @@ -46,7 +46,7 @@ jobs: - name: Docker metadata id: meta - uses: docker/metadata-action@v3 + uses: docker/metadata-action@v4 with: images: registry.uffizzi.com/${{ env.UUID_TAG }} tags: | From 76cf1bff87507fc3f73ba5d8c4308d348c609aab Mon Sep 17 00:00:00 2001 From: James Lucktaylor Date: Sat, 18 Feb 2023 14:30:44 +0000 Subject: [PATCH 05/22] Add scheduled test to Actions for all features Add a new job to the Rust workflow to run 'cargo build' and 'cargo test' (on the cron schedule only) with the '--all-features' flag. This will execute across all three environments: Linux, macOS, Windows. Autoformat the Rust workflow file via the Red Hat YAML extension for Visual Studio Code: https://marketplace.visualstudio.com/items?itemName=redhat.vscode-yaml This straightens out whitespace and string quoting for safer parsing. Fixes #3506. --- .github/workflows/rust.yml | 115 +++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 44 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 5f783ca9e..f7f1be79c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: schedule: # Everyday at 5:00am - - cron: '0 5 * * *' + - cron: "0 5 * * *" pull_request: push: # trying and staging branches are for Bors config @@ -25,36 +25,36 @@ jobs: # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations image: ubuntu:18.04 steps: - - uses: actions/checkout@v3 - - name: Install needed dependencies - run: | - apt-get update && apt-get install -y curl - apt-get install build-essential -y - - name: Run test with Rust stable - if: github.event_name != 'schedule' - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true - - name: Run test with Rust nightly - if: github.event_name == 'schedule' - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - override: true - # Disable cache due to disk space issues with Windows workers in CI - # - name: Cache dependencies - # uses: Swatinem/rust-cache@v2.2.0 - - name: Run cargo check without any default features - uses: actions-rs/cargo@v1 - with: - command: build - args: --locked --release --no-default-features --all - - name: Run cargo test - uses: actions-rs/cargo@v1 - with: - command: test - args: --locked --release --all + - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update && apt-get install -y curl + apt-get install build-essential -y + - name: Run test with Rust stable + if: github.event_name != 'schedule' + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Run test with Rust nightly + if: github.event_name == 'schedule' + uses: actions-rs/toolchain@v1 + with: + toolchain: nightly + override: true + # Disable cache due to disk space issues with Windows workers in CI + # - name: Cache dependencies + # uses: Swatinem/rust-cache@v2.2.0 + - name: Run cargo check without any default features + uses: actions-rs/cargo@v1 + with: + command: build + args: --locked --release --no-default-features --all + - name: Run cargo test + uses: actions-rs/cargo@v1 + with: + command: test + args: --locked --release --all test-others: name: Tests on ${{ matrix.os }} @@ -64,19 +64,46 @@ jobs: matrix: os: [macos-12, windows-2022] steps: - - uses: actions/checkout@v3 -# - name: Cache dependencies -# uses: Swatinem/rust-cache@v2.2.0 - - name: Run cargo check without any default features - uses: actions-rs/cargo@v1 - with: - command: build - args: --locked --release --no-default-features --all - - name: Run cargo test - uses: actions-rs/cargo@v1 - with: - command: test - args: --locked --release --all + - uses: actions/checkout@v3 + # - name: Cache dependencies + # uses: Swatinem/rust-cache@v2.2.0 + - name: Run cargo check without any default features + uses: actions-rs/cargo@v1 + with: + command: build + args: --locked --release --no-default-features --all + - name: Run cargo test + uses: actions-rs/cargo@v1 + with: + command: test + args: --locked --release --all + + test-all-features: + name: Tests all features on ${{ matrix.os }} on cron schedule only + + runs-on: ${{ matrix.os }} + + strategy: + fail-fast: false + matrix: + os: [ubuntu-18.04, macos-12, windows-2022] + + if: github.event_name == 'schedule' + + steps: + - uses: actions/checkout@v3 + + - name: Run cargo build with all features + uses: actions-rs/cargo@v1 + with: + command: build + args: --workspace --locked --release --all-features + + - name: Run cargo test with all features + uses: actions-rs/cargo@v1 + with: + command: test + args: --workspace --locked --release --all-features # We run tests in debug also, to make sure that the debug_assertions are hit test-debug: From 2dd948a4a103c097cb866a9f90504f285bf5837b Mon Sep 17 00:00:00 2001 From: James Lucktaylor Date: Fri, 3 Mar 2023 12:07:42 +0000 Subject: [PATCH 06/22] ci(actions/rust): align with test-linux job --- .github/workflows/rust.yml | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f7f1be79c..9ed68f5aa 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -79,15 +79,11 @@ jobs: args: --locked --release --all test-all-features: - name: Tests all features on ${{ matrix.os }} on cron schedule only - - runs-on: ${{ matrix.os }} - - strategy: - fail-fast: false - matrix: - os: [ubuntu-18.04, macos-12, windows-2022] - + name: Tests all features on cron schedule only + runs-on: ubuntu-latest + container: + # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations + image: ubuntu:18.04 if: github.event_name == 'schedule' steps: From a9e17ab8c6637b8e75fd5551aec47536f47544f7 Mon Sep 17 00:00:00 2001 From: James Lucktaylor Date: Fri, 3 Mar 2023 12:08:30 +0000 Subject: [PATCH 07/22] style(actions/rust): resolve PR review --- .github/workflows/rust.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 9ed68f5aa..4a58b9ccf 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -4,7 +4,7 @@ on: workflow_dispatch: schedule: # Everyday at 5:00am - - cron: "0 5 * * *" + - cron: '0 5 * * *' pull_request: push: # trying and staging branches are for Bors config @@ -85,16 +85,13 @@ jobs: # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations image: ubuntu:18.04 if: github.event_name == 'schedule' - steps: - uses: actions/checkout@v3 - - name: Run cargo build with all features uses: actions-rs/cargo@v1 with: command: build args: --workspace --locked --release --all-features - - name: Run cargo test with all features uses: actions-rs/cargo@v1 with: From 22219fd88ffcfceee47779638f2382305af8dc9a Mon Sep 17 00:00:00 2001 From: James Lucktaylor Date: Mon, 6 Mar 2023 12:08:32 +0000 Subject: [PATCH 08/22] ci(actions/rust): explicitly set up dependencies and toolchain override --- .github/workflows/rust.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 4a58b9ccf..14417f25c 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -87,6 +87,14 @@ jobs: if: github.event_name == 'schedule' steps: - uses: actions/checkout@v3 + - name: Install needed dependencies + run: | + apt-get update + apt-get install --assume-yes build-essential curl + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true - name: Run cargo build with all features uses: actions-rs/cargo@v1 with: From febc8d1b5234cab39e039d1c61d706d5987ed65a Mon Sep 17 00:00:00 2001 From: curquiza Date: Tue, 7 Mar 2023 14:55:09 +0100 Subject: [PATCH 09/22] Clean CI file names --- ...l_benchmarks.yml => benchmarks-manual.yml} | 2 +- ...exing.yml => benchmarks-push-indexing.yml} | 2 +- ...geo.yml => benchmarks-push-search-geo.yml} | 2 +- ...s.yml => benchmarks-push-search-songs.yml} | 2 +- ...ki.yml => benchmarks-push-search-wiki.yml} | 2 +- .../workflows/create-issue-dependencies.yml | 28 ------------------- .github/workflows/dependency-issue.yml | 24 ++++++++++++++++ .../workflows/{flaky.yml => flaky-tests.yml} | 0 ...-brew-pkg.yml => publish-apt-brew-pkg.yml} | 2 +- .github/workflows/publish-binaries.yml | 4 +-- .github/workflows/publish-docker-images.yml | 5 ++-- .../workflows/{rust.yml => test-suite.yml} | 2 +- 12 files changed, 35 insertions(+), 40 deletions(-) rename .github/workflows/{manual_benchmarks.yml => benchmarks-manual.yml} (99%) rename .github/workflows/{push_benchmarks_indexing.yml => benchmarks-push-indexing.yml} (98%) rename .github/workflows/{push_benchmarks_search_geo.yml => benchmarks-push-search-geo.yml} (98%) rename .github/workflows/{push_benchmarks_search_songs.yml => benchmarks-push-search-songs.yml} (98%) rename .github/workflows/{push_benchmarks_search_wiki.yml => benchmarks-push-search-wiki.yml} (98%) delete mode 100644 .github/workflows/create-issue-dependencies.yml create mode 100644 .github/workflows/dependency-issue.yml rename .github/workflows/{flaky.yml => flaky-tests.yml} (100%) rename .github/workflows/{publish-deb-brew-pkg.yml => publish-apt-brew-pkg.yml} (97%) rename .github/workflows/{rust.yml => test-suite.yml} (99%) diff --git a/.github/workflows/manual_benchmarks.yml b/.github/workflows/benchmarks-manual.yml similarity index 99% rename from .github/workflows/manual_benchmarks.yml rename to .github/workflows/benchmarks-manual.yml index 76c6fe0fe..44793fc17 100644 --- a/.github/workflows/manual_benchmarks.yml +++ b/.github/workflows/benchmarks-manual.yml @@ -1,4 +1,4 @@ -name: Benchmarks +name: Benchmarks (manual) on: workflow_dispatch: diff --git a/.github/workflows/push_benchmarks_indexing.yml b/.github/workflows/benchmarks-push-indexing.yml similarity index 98% rename from .github/workflows/push_benchmarks_indexing.yml rename to .github/workflows/benchmarks-push-indexing.yml index 12f9f6eda..a966570e6 100644 --- a/.github/workflows/push_benchmarks_indexing.yml +++ b/.github/workflows/benchmarks-push-indexing.yml @@ -1,4 +1,4 @@ -name: Benchmarks indexing (push) +name: Benchmarks of indexing (push) on: push: diff --git a/.github/workflows/push_benchmarks_search_geo.yml b/.github/workflows/benchmarks-push-search-geo.yml similarity index 98% rename from .github/workflows/push_benchmarks_search_geo.yml rename to .github/workflows/benchmarks-push-search-geo.yml index 02661061f..1b5cacfd1 100644 --- a/.github/workflows/push_benchmarks_search_geo.yml +++ b/.github/workflows/benchmarks-push-search-geo.yml @@ -1,4 +1,4 @@ -name: Benchmarks search geo (push) +name: Benchmarks of search for geo (push) on: push: diff --git a/.github/workflows/push_benchmarks_search_songs.yml b/.github/workflows/benchmarks-push-search-songs.yml similarity index 98% rename from .github/workflows/push_benchmarks_search_songs.yml rename to .github/workflows/benchmarks-push-search-songs.yml index 92684a907..02cd10472 100644 --- a/.github/workflows/push_benchmarks_search_songs.yml +++ b/.github/workflows/benchmarks-push-search-songs.yml @@ -1,4 +1,4 @@ -name: Benchmarks search songs (push) +name: Benchmarks of search for songs (push) on: push: diff --git a/.github/workflows/push_benchmarks_search_wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml similarity index 98% rename from .github/workflows/push_benchmarks_search_wiki.yml rename to .github/workflows/benchmarks-push-search-wiki.yml index 0f6511337..455aaa95d 100644 --- a/.github/workflows/push_benchmarks_search_wiki.yml +++ b/.github/workflows/benchmarks-push-search-wiki.yml @@ -1,4 +1,4 @@ -name: Benchmarks search wikipedia articles (push) +name: Benchmarks of search for Wikipedia articles (push) on: push: diff --git a/.github/workflows/create-issue-dependencies.yml b/.github/workflows/create-issue-dependencies.yml deleted file mode 100644 index 3ad1be910..000000000 --- a/.github/workflows/create-issue-dependencies.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Create issue to upgrade dependencies -on: - schedule: - # Run the first of the month, every 3 month - - cron: '0 0 1 */3 *' - workflow_dispatch: - -jobs: - create-issue: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Create an issue - uses: actions-ecosystem/action-create-issue@v1 - with: - github_token: ${{ secrets.MEILI_BOT_GH_PAT }} - title: Upgrade dependencies - body: | - This issue is about updating Meilisearch dependencies: - - [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...) - - [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml) - - ⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint! - - The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml) - labels: | - dependencies - maintenance diff --git a/.github/workflows/dependency-issue.yml b/.github/workflows/dependency-issue.yml new file mode 100644 index 000000000..941cc4e53 --- /dev/null +++ b/.github/workflows/dependency-issue.yml @@ -0,0 +1,24 @@ +name: Create issue to upgrade dependencies + +on: + schedule: + # Run the first of the month, every 3 month + - cron: '0 0 1 */3 *' + workflow_dispatch: + +jobs: + create-issue: + runs-on: ubuntu-latest + env: + ISSUE_TEMPLATE: issue-template.md + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE + - name: Create issue + run: | + gh issue create \ + --title 'Upgrade dependencies' \ + --label 'dependencies,maintenance' \ + --body-file $ISSUE_TEMPLATE diff --git a/.github/workflows/flaky.yml b/.github/workflows/flaky-tests.yml similarity index 100% rename from .github/workflows/flaky.yml rename to .github/workflows/flaky-tests.yml diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml similarity index 97% rename from .github/workflows/publish-deb-brew-pkg.yml rename to .github/workflows/publish-apt-brew-pkg.yml index a382df3ed..e24d8ccf1 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -1,4 +1,4 @@ -name: Publish to APT repository & Homebrew +name: Publish to APT & Homebrew on: release: diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 151b522fd..02253a375 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -1,3 +1,5 @@ +name: Publish binaries to GitHub release + on: workflow_dispatch: schedule: @@ -5,8 +7,6 @@ on: release: types: [published] -name: Publish binaries to release - jobs: check-version: name: Check the version validity diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 39bab4d0d..9ceeaaaa4 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -1,4 +1,5 @@ ---- +name: Publish images to Docker Hub + on: push: # Will run for every tag pushed except `latest` @@ -12,8 +13,6 @@ on: - cron: '0 23 * * *' # Every day at 11:00pm workflow_dispatch: -name: Publish tagged images to Docker Hub - jobs: docker: runs-on: docker diff --git a/.github/workflows/rust.yml b/.github/workflows/test-suite.yml similarity index 99% rename from .github/workflows/rust.yml rename to .github/workflows/test-suite.yml index 14417f25c..69deade9d 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/test-suite.yml @@ -1,4 +1,4 @@ -name: Rust +name: Test suite on: workflow_dispatch: From c5f22be6e1931c46f6cc6f56c799c9a4e68c789b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 9 Mar 2023 11:12:49 +0100 Subject: [PATCH 10/22] add boolean support for csv documents --- meilisearch/tests/documents/add_documents.rs | 109 +++++++++++++++++++ milli/src/documents/builder.rs | 28 ++++- milli/src/documents/mod.rs | 17 +++ 3 files changed, 151 insertions(+), 3 deletions(-) diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 612a2cdb6..164d68582 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -279,6 +279,81 @@ async fn add_csv_document() { "###); } +#[actix_rt::test] +async fn add_csv_document_with_types() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id:number,name:string,race:string,age:number,cute:boolean +0,jean,bernese mountain,2.5,true +1,,,, +2,lilou,pug,-2,false"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"202 Accepted"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "taskUid": 0, + "indexUid": "pets", + "status": "enqueued", + "type": "documentAdditionOrUpdate", + "enqueuedAt": "[date]" + } + "###); + let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await; + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###" + { + "uid": 0, + "indexUid": "pets", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "#id": 0, + "name": "jean", + "race": "bernese mountain", + "age": 2.5, + "cute": true + }, + { + "#id": 1, + "name": null, + "race": null, + "age": null, + "cute": null + }, + { + "#id": 2, + "name": "lilou", + "race": "pug", + "age": -2, + "cute": false + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} + #[actix_rt::test] async fn add_csv_document_with_custom_delimiter() { let server = Server::new().await; @@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() { "###); } +#[actix_rt::test] +async fn add_csv_document_with_types_error() { + let server = Server::new().await; + let index = server.index("pets"); + + let document = "#id:number,a:boolean,b:number +0,doggo,1"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.", + "code": "malformed_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#malformed_payload" + } + "###); + + let document = "#id:number,a:boolean,b:number +0,true,doggo"; + + let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###" + { + "message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.", + "code": "malformed_payload", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#malformed_payload" + } + "###); +} + /// any other content-type is must be refused #[actix_rt::test] async fn error_add_documents_test_bad_content_types() { diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index 1fa59168e..ace9340d7 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -116,12 +116,13 @@ impl DocumentsBatchBuilder { let value = &record[*i]; match type_ { AllowedType::Number => { - if value.trim().is_empty() { + let trimmed_value = value.trim(); + if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; - } else if let Ok(integer) = value.trim().parse::() { + } else if let Ok(integer) = trimmed_value.parse::() { to_writer(&mut self.value_buffer, &integer)?; } else { - match value.trim().parse::() { + match trimmed_value.parse::() { Ok(float) => { to_writer(&mut self.value_buffer, &float)?; } @@ -135,6 +136,25 @@ impl DocumentsBatchBuilder { } } } + AllowedType::Boolean => { + let trimmed_value = value.trim(); + if trimmed_value.is_empty() { + to_writer(&mut self.value_buffer, &Value::Null)?; + } else { + match trimmed_value.parse::() { + Ok(bool) => { + to_writer(&mut self.value_buffer, &bool)?; + } + Err(error) => { + return Err(Error::ParseBool { + error, + line, + value: value.to_string(), + }); + } + } + } + } AllowedType::String => { if value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; @@ -173,6 +193,7 @@ impl DocumentsBatchBuilder { #[derive(Debug)] enum AllowedType { String, + Boolean, Number, } @@ -181,6 +202,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) { match header.rsplit_once(':') { Some((field_name, field_type)) => match field_type { "string" => (field_name, AllowedType::String), + "boolean" => (field_name, AllowedType::Boolean), "number" => (field_name, AllowedType::Number), // if the pattern isn't reconized, we keep the whole field. _otherwise => (header, AllowedType::String), diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index da3a07942..67b99db9a 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -90,6 +90,7 @@ impl DocumentsBatchIndex { #[derive(Debug)] pub enum Error { ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, + ParseBool { error: std::str::ParseBoolError, line: usize, value: String }, InvalidDocumentFormat, InvalidEnrichedData, InvalidUtf8(Utf8Error), @@ -136,6 +137,9 @@ impl fmt::Display for Error { Error::ParseFloat { error, line, value } => { write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) } + Error::ParseBool { error, line, value } => { + write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error) + } Error::InvalidDocumentFormat => { f.write_str("Invalid document addition format, missing the documents batch index.") } @@ -274,6 +278,19 @@ mod test { ]); } + #[test] + fn csv_types_dont_panic() { + let csv1_content = + "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5"; + let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content)); + + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + builder.append_csv(csv1).unwrap(); + let vector = builder.into_inner().unwrap(); + + DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap(); + } + #[test] fn out_of_order_csv_fields() { let csv1_content = "id:number,b\n1,0"; From eddefb0e0fcb17ba45e23d581443a9f52926e010 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 9 Mar 2023 11:23:57 +0100 Subject: [PATCH 11/22] refactor the error type of the milli::document thing silence a warning --- milli/src/documents/mod.rs | 79 ++++++++------------------------------ 1 file changed, 17 insertions(+), 62 deletions(-) diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index 67b99db9a..43b31187d 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -3,7 +3,7 @@ mod enriched; mod reader; mod serde_impl; -use std::fmt::{self, Debug}; +use std::fmt::Debug; use std::io; use std::str::Utf8Error; @@ -87,75 +87,30 @@ impl DocumentsBatchIndex { } } -#[derive(Debug)] +#[derive(Debug, thiserror::Error)] pub enum Error { + #[error("Error parsing number {value:?} at line {line}: {error}")] ParseFloat { error: std::num::ParseFloatError, line: usize, value: String }, + #[error("Error parsing boolean {value:?} at line {line}: {error}")] ParseBool { error: std::str::ParseBoolError, line: usize, value: String }, + #[error("Invalid document addition format, missing the documents batch index.")] InvalidDocumentFormat, + #[error("Invalid enriched data.")] InvalidEnrichedData, - InvalidUtf8(Utf8Error), - Csv(csv::Error), - Json(serde_json::Error), + #[error(transparent)] + InvalidUtf8(#[from] Utf8Error), + #[error(transparent)] + Csv(#[from] csv::Error), + #[error(transparent)] + Json(#[from] serde_json::Error), + #[error(transparent)] Serialize(serde_json::Error), - Grenad(grenad::Error), - Io(io::Error), + #[error(transparent)] + Grenad(#[from] grenad::Error), + #[error(transparent)] + Io(#[from] io::Error), } -impl From for Error { - fn from(e: csv::Error) -> Self { - Self::Csv(e) - } -} - -impl From for Error { - fn from(other: io::Error) -> Self { - Self::Io(other) - } -} - -impl From for Error { - fn from(other: serde_json::Error) -> Self { - Self::Json(other) - } -} - -impl From for Error { - fn from(other: grenad::Error) -> Self { - Self::Grenad(other) - } -} - -impl From for Error { - fn from(other: Utf8Error) -> Self { - Self::InvalidUtf8(other) - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Error::ParseFloat { error, line, value } => { - write!(f, "Error parsing number {:?} at line {}: {}", value, line, error) - } - Error::ParseBool { error, line, value } => { - write!(f, "Error parsing boolean {:?} at line {}: {}", value, line, error) - } - Error::InvalidDocumentFormat => { - f.write_str("Invalid document addition format, missing the documents batch index.") - } - Error::InvalidEnrichedData => f.write_str("Invalid enriched data."), - Error::InvalidUtf8(e) => write!(f, "{}", e), - Error::Io(e) => write!(f, "{}", e), - Error::Serialize(e) => write!(f, "{}", e), - Error::Grenad(e) => write!(f, "{}", e), - Error::Csv(e) => write!(f, "{}", e), - Error::Json(e) => write!(f, "{}", e), - } - } -} - -impl std::error::Error for Error {} - #[cfg(test)] pub fn objects_from_json_value(json: serde_json::Value) -> Vec { let documents = match json { From f45daf80315c7cef2b09c62f0b964ec1fcb2ca32 Mon Sep 17 00:00:00 2001 From: curquiza Date: Mon, 13 Mar 2023 14:24:15 +0100 Subject: [PATCH 12/22] Enable cache again in test suite CI --- .github/workflows/test-suite.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 69deade9d..e08b77e3d 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -42,9 +42,8 @@ jobs: with: toolchain: nightly override: true - # Disable cache due to disk space issues with Windows workers in CI - # - name: Cache dependencies - # uses: Swatinem/rust-cache@v2.2.0 + - name: Cache dependencies + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -65,8 +64,8 @@ jobs: os: [macos-12, windows-2022] steps: - uses: actions/checkout@v3 - # - name: Cache dependencies - # uses: Swatinem/rust-cache@v2.2.0 + - name: Cache dependencies + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -123,8 +122,8 @@ jobs: with: toolchain: stable override: true - # - name: Cache dependencies - # uses: Swatinem/rust-cache@v2.2.0 + - name: Cache dependencies + uses: Swatinem/rust-cache@v2.2.0 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -142,8 +141,8 @@ jobs: toolchain: 1.67.0 override: true components: clippy - # - name: Cache dependencies - # uses: Swatinem/rust-cache@v2.2.0 + - name: Cache dependencies + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -162,8 +161,8 @@ jobs: toolchain: nightly override: true components: rustfmt - # - name: Cache dependencies - # uses: Swatinem/rust-cache@v2.2.0 + - name: Cache dependencies + uses: Swatinem/rust-cache@v2.2.0 - name: Run cargo fmt # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate From 0f33a65468b94a43e8eebd1582e2a3263727791a Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 13 Mar 2023 16:51:11 +0100 Subject: [PATCH 13/22] makes kero happy --- milli/src/documents/builder.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs index ace9340d7..e5124f67f 100644 --- a/milli/src/documents/builder.rs +++ b/milli/src/documents/builder.rs @@ -114,9 +114,9 @@ impl DocumentsBatchBuilder { self.value_buffer.clear(); let value = &record[*i]; + let trimmed_value = value.trim(); match type_ { AllowedType::Number => { - let trimmed_value = value.trim(); if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; } else if let Ok(integer) = trimmed_value.parse::() { @@ -137,7 +137,6 @@ impl DocumentsBatchBuilder { } } AllowedType::Boolean => { - let trimmed_value = value.trim(); if trimmed_value.is_empty() { to_writer(&mut self.value_buffer, &Value::Null)?; } else { From e7994cdeb3f2660b959d8b8237643d6db6f26143 Mon Sep 17 00:00:00 2001 From: Gregory Conrad Date: Sun, 26 Mar 2023 12:18:39 -0400 Subject: [PATCH 14/22] feat: check to see if the PK changed before erroring out Previously, if the primary key was set and a Settings update contained a primary key, an error would be returned. However, this error is not needed if the new PK == the current PK. This commit just checks to see if the PK actually changes before raising an error. --- milli/src/update/settings.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 4f4fa25d6..3e271924b 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -565,8 +565,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.index.put_primary_key(self.wtxn, primary_key)?; Ok(()) } else { - let primary_key = self.index.primary_key(self.wtxn)?.unwrap(); - Err(UserError::PrimaryKeyCannotBeChanged(primary_key.to_string()).into()) + let curr_primary_key = self.index.primary_key(self.wtxn)?.unwrap().to_string(); + if primary_key == &curr_primary_key { + Ok(()) + } else { + Err(UserError::PrimaryKeyCannotBeChanged(curr_primary_key).into()) + } } } Setting::Reset => { @@ -1332,6 +1336,17 @@ mod tests { .unwrap(); wtxn.commit().unwrap(); + // Updating settings with the same primary key should do nothing + let mut wtxn = index.write_txn().unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_primary_key(S("mykey")); + }) + .unwrap(); + assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); + wtxn.commit().unwrap(); + + // Updating the settings with a different (or no) primary key causes an error let mut wtxn = index.write_txn().unwrap(); let error = index .update_settings_using_wtxn(&mut wtxn, |settings| { From cf5145b5421d8ff3ef625e53e82b33fc77012d2f Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 29 Mar 2023 14:27:40 +0200 Subject: [PATCH 15/22] Reduce the time to import a dump With this commit, for a dump containing 1M tasks we went from 3m36s to import the task queue down to 1m02s --- index-scheduler/src/lib.rs | 241 ++++++++++++++++++++----------------- meilisearch/src/lib.rs | 6 +- 2 files changed, 135 insertions(+), 112 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index e23e4ff8b..3e7c85148 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -43,7 +43,7 @@ pub use error::Error; use file_store::FileStore; use meilisearch_types::error::ResponseError; use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; -use meilisearch_types::heed::{self, Database, Env, RoTxn}; +use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn}; use meilisearch_types::milli; use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::update::IndexerConfig; @@ -883,115 +883,8 @@ impl IndexScheduler { /// Register a new task coming from a dump in the scheduler. /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. - pub fn register_dumped_task( - &mut self, - task: TaskDump, - content_file: Option>, - ) -> Result { - // Currently we don't need to access the tasks queue while loading a dump thus I can block everything. - let mut wtxn = self.env.write_txn()?; - - let content_uuid = match content_file { - Some(content_file) if task.status == Status::Enqueued => { - let (uuid, mut file) = self.create_update_file()?; - let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); - for doc in content_file { - builder.append_json_object(&doc?)?; - } - builder.into_inner()?; - file.persist()?; - - Some(uuid) - } - // If the task isn't `Enqueued` then just generate a recognisable `Uuid` - // in case we try to open it later. - _ if task.status != Status::Enqueued => Some(Uuid::nil()), - _ => None, - }; - - let task = Task { - uid: task.uid, - enqueued_at: task.enqueued_at, - started_at: task.started_at, - finished_at: task.finished_at, - error: task.error, - canceled_by: task.canceled_by, - details: task.details, - status: task.status, - kind: match task.kind { - KindDump::DocumentImport { - primary_key, - method, - documents_count, - allow_index_creation, - } => KindWithContent::DocumentAdditionOrUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - method, - content_file: content_uuid.ok_or(Error::CorruptedDump)?, - documents_count, - allow_index_creation, - }, - KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { - documents_ids, - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::DocumentClear => KindWithContent::DocumentClear { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::Settings { settings, is_deletion, allow_index_creation } => { - KindWithContent::SettingsUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - new_settings: settings, - is_deletion, - allow_index_creation, - } - } - KindDump::IndexDeletion => KindWithContent::IndexDeletion { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - }, - KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - }, - KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { - index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, - primary_key, - }, - KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, - KindDump::TaskCancelation { query, tasks } => { - KindWithContent::TaskCancelation { query, tasks } - } - KindDump::TasksDeletion { query, tasks } => { - KindWithContent::TaskDeletion { query, tasks } - } - KindDump::DumpCreation { keys, instance_uid } => { - KindWithContent::DumpCreation { keys, instance_uid } - } - KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, - }, - }; - - self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?; - - for index in task.indexes() { - self.update_index(&mut wtxn, index, |bitmap| { - bitmap.insert(task.uid); - })?; - } - - self.update_status(&mut wtxn, task.status, |bitmap| { - bitmap.insert(task.uid); - })?; - - self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| { - (bitmap.insert(task.uid)); - })?; - - wtxn.commit()?; - self.wake_up.signal(); - - Ok(task) + pub fn register_dumped_task(&mut self) -> Result { + Dump::new(self) } /// Create a new index without any associated task. @@ -1218,6 +1111,134 @@ impl IndexScheduler { } } +pub struct Dump<'a> { + index_scheduler: &'a IndexScheduler, + wtxn: RwTxn<'a, 'a>, +} + +impl<'a> Dump<'a> { + pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result { + // While loading a dump no one should be able to access the scheduler thus I can block everything. + let wtxn = index_scheduler.env.write_txn()?; + + Ok(Dump { index_scheduler, wtxn }) + } + + /// Register a new task coming from a dump in the scheduler. + /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running. + pub fn register_dumped_task( + &mut self, + task: TaskDump, + content_file: Option>, + ) -> Result { + let content_uuid = match content_file { + Some(content_file) if task.status == Status::Enqueued => { + let (uuid, mut file) = self.index_scheduler.create_update_file()?; + let mut builder = DocumentsBatchBuilder::new(file.as_file_mut()); + for doc in content_file { + builder.append_json_object(&doc?)?; + } + builder.into_inner()?; + file.persist()?; + + Some(uuid) + } + // If the task isn't `Enqueued` then just generate a recognisable `Uuid` + // in case we try to open it later. + _ if task.status != Status::Enqueued => Some(Uuid::nil()), + _ => None, + }; + + let task = Task { + uid: task.uid, + enqueued_at: task.enqueued_at, + started_at: task.started_at, + finished_at: task.finished_at, + error: task.error, + canceled_by: task.canceled_by, + details: task.details, + status: task.status, + kind: match task.kind { + KindDump::DocumentImport { + primary_key, + method, + documents_count, + allow_index_creation, + } => KindWithContent::DocumentAdditionOrUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + method, + content_file: content_uuid.ok_or(Error::CorruptedDump)?, + documents_count, + allow_index_creation, + }, + KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion { + documents_ids, + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::DocumentClear => KindWithContent::DocumentClear { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::Settings { settings, is_deletion, allow_index_creation } => { + KindWithContent::SettingsUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + new_settings: settings, + is_deletion, + allow_index_creation, + } + } + KindDump::IndexDeletion => KindWithContent::IndexDeletion { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + }, + KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate { + index_uid: task.index_uid.ok_or(Error::CorruptedDump)?, + primary_key, + }, + KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps }, + KindDump::TaskCancelation { query, tasks } => { + KindWithContent::TaskCancelation { query, tasks } + } + KindDump::TasksDeletion { query, tasks } => { + KindWithContent::TaskDeletion { query, tasks } + } + KindDump::DumpCreation { keys, instance_uid } => { + KindWithContent::DumpCreation { keys, instance_uid } + } + KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, + }, + }; + + self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?; + + for index in task.indexes() { + self.index_scheduler.update_index(&mut self.wtxn, index, |bitmap| { + bitmap.insert(task.uid); + })?; + } + + self.index_scheduler.update_status(&mut self.wtxn, task.status, |bitmap| { + bitmap.insert(task.uid); + })?; + + self.index_scheduler.update_kind(&mut self.wtxn, task.kind.as_kind(), |bitmap| { + (bitmap.insert(task.uid)); + })?; + + Ok(task) + } + + /// Commit all the changes and exit the importing dump state + pub fn finish(self) -> Result<()> { + self.wtxn.commit()?; + self.index_scheduler.wake_up.signal(); + Ok(()) + } +} + /// The outcome of calling the [`IndexScheduler::tick`] function. pub enum TickOutcome { /// The scheduler should immediately attempt another `tick`. diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index 13c236983..98e754e67 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -367,12 +367,14 @@ fn import_dump( log::info!("All documents successfully imported."); } + let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; + // 4. Import the tasks. for ret in dump_reader.tasks()? { let (task, file) = ret?; - index_scheduler.register_dumped_task(task, file)?; + index_scheduler_dump.register_dumped_task(task, file)?; } - Ok(()) + Ok(index_scheduler_dump.finish()?) } pub fn configure_data( From 3fb67f94f796bfe451be20e8852cd908b066a351 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 29 Mar 2023 14:44:15 +0200 Subject: [PATCH 16/22] Reduce the time to import a dump by caching some datas With this commit, for a dump containing 1M tasks we went form 1m02 to 6s --- index-scheduler/src/lib.rs | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 3e7c85148..296029435 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -31,6 +31,7 @@ mod uuid_codec; pub type Result = std::result::Result; pub type TaskId = u32; +use std::collections::HashMap; use std::ops::{Bound, RangeBounds}; use std::path::{Path, PathBuf}; use std::sync::atomic::AtomicBool; @@ -1114,6 +1115,10 @@ impl IndexScheduler { pub struct Dump<'a> { index_scheduler: &'a IndexScheduler, wtxn: RwTxn<'a, 'a>, + + indexes: HashMap, + statuses: HashMap, + kinds: HashMap, } impl<'a> Dump<'a> { @@ -1121,7 +1126,13 @@ impl<'a> Dump<'a> { // While loading a dump no one should be able to access the scheduler thus I can block everything. let wtxn = index_scheduler.env.write_txn()?; - Ok(Dump { index_scheduler, wtxn }) + Ok(Dump { + index_scheduler, + wtxn, + indexes: HashMap::new(), + statuses: HashMap::new(), + kinds: HashMap::new(), + }) } /// Register a new task coming from a dump in the scheduler. @@ -1215,26 +1226,38 @@ impl<'a> Dump<'a> { self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?; for index in task.indexes() { - self.index_scheduler.update_index(&mut self.wtxn, index, |bitmap| { - bitmap.insert(task.uid); - })?; + match self.indexes.get_mut(index) { + Some(bitmap) => { + bitmap.insert(task.uid); + } + None => { + let mut bitmap = RoaringBitmap::new(); + bitmap.insert(task.uid); + self.indexes.insert(index.to_string(), bitmap); + } + }; } - - self.index_scheduler.update_status(&mut self.wtxn, task.status, |bitmap| { - bitmap.insert(task.uid); - })?; - - self.index_scheduler.update_kind(&mut self.wtxn, task.kind.as_kind(), |bitmap| { - (bitmap.insert(task.uid)); - })?; + self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid); + self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid); Ok(task) } /// Commit all the changes and exit the importing dump state - pub fn finish(self) -> Result<()> { + pub fn finish(mut self) -> Result<()> { + for (index, bitmap) in self.indexes { + self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?; + } + for (status, bitmap) in self.statuses { + self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?; + } + for (kind, bitmap) in self.kinds { + self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?; + } + self.wtxn.commit()?; self.index_scheduler.wake_up.signal(); + Ok(()) } } From 53aa0a1b541108dae11f29225020990b8e2a0ced Mon Sep 17 00:00:00 2001 From: Filip Bachul Date: Thu, 30 Mar 2023 23:17:34 +0200 Subject: [PATCH 17/22] handle _geo(x,x) sort error --- milli/src/asc_desc.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index bbc49ea7d..7770f5b35 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -81,6 +81,7 @@ impl FromStr for Member { if is_reserved_keyword(text) || text.starts_with("_geoRadius(") || text.starts_with("_geoBoundingBox(") + || text.starts_with("_geo(") { return Err(AscDescError::ReservedKeyword { name: text.to_string() })?; } @@ -265,6 +266,8 @@ mod tests { ("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))), ("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))), ("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))), + ("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }), + ("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }), ]; for (req, expected_error) in invalid_req { From cb2b5eb38e6b43294e90289f57988ae8e915a265 Mon Sep 17 00:00:00 2001 From: Filip Bachul Date: Thu, 30 Mar 2023 23:21:23 +0200 Subject: [PATCH 18/22] handle _geoDistance(x,x) sort error --- milli/src/asc_desc.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index 7770f5b35..038738b97 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -82,6 +82,7 @@ impl FromStr for Member { || text.starts_with("_geoRadius(") || text.starts_with("_geoBoundingBox(") || text.starts_with("_geo(") + || text.starts_with("_geoDistance(") { return Err(AscDescError::ReservedKeyword { name: text.to_string() })?; } @@ -268,6 +269,8 @@ mod tests { ("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))), ("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }), ("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }), + ("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }), + ("_geoDistance(12, -2021):desc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }), ]; for (req, expected_error) in invalid_req { From 1861c69964ae54572be1eb3265f9f68c1bee0d7b Mon Sep 17 00:00:00 2001 From: Filip Bachul Date: Thu, 30 Mar 2023 23:37:26 +0200 Subject: [PATCH 19/22] fmt --- milli/src/asc_desc.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index 038738b97..bde0dd440 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -270,7 +270,10 @@ mod tests { ("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }), ("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }), ("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }), - ("_geoDistance(12, -2021):desc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }), + ( + "_geoDistance(12, -2021):desc", + ReservedKeyword { name: S("_geoDistance(12, -2021)") }, + ), ]; for (req, expected_error) in invalid_req { From 0177d66149c3d1bcaecfeba7c4ea554aafbb3410 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Apr 2023 17:58:46 +0000 Subject: [PATCH 20/22] Bump Swatinem/rust-cache from 2.2.0 to 2.2.1 Bumps [Swatinem/rust-cache](https://github.com/Swatinem/rust-cache) from 2.2.0 to 2.2.1. - [Release notes](https://github.com/Swatinem/rust-cache/releases) - [Changelog](https://github.com/Swatinem/rust-cache/blob/master/CHANGELOG.md) - [Commits](https://github.com/Swatinem/rust-cache/compare/v2.2.0...v2.2.1) --- updated-dependencies: - dependency-name: Swatinem/rust-cache dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/test-suite.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index e08b77e3d..820fcb656 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -43,7 +43,7 @@ jobs: toolchain: nightly override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.0 + uses: Swatinem/rust-cache@v2.2.1 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -65,7 +65,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.0 + uses: Swatinem/rust-cache@v2.2.1 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -123,7 +123,7 @@ jobs: toolchain: stable override: true - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.0 + uses: Swatinem/rust-cache@v2.2.1 - name: Run tests in debug uses: actions-rs/cargo@v1 with: @@ -142,7 +142,7 @@ jobs: override: true components: clippy - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.0 + uses: Swatinem/rust-cache@v2.2.1 - name: Run cargo clippy uses: actions-rs/cargo@v1 with: @@ -162,7 +162,7 @@ jobs: override: true components: rustfmt - name: Cache dependencies - uses: Swatinem/rust-cache@v2.2.0 + uses: Swatinem/rust-cache@v2.2.1 - name: Run cargo fmt # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file. # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate From 590b1d8fb7fbefb1b472cb10b65cafc7ade9c067 Mon Sep 17 00:00:00 2001 From: cvermand <33010418+bidoubiwa@users.noreply.github.com> Date: Mon, 3 Apr 2023 13:14:20 +0200 Subject: [PATCH 21/22] Add a newline after the meilisearch version in the issue template Just a small change to make it easier in removing the version example and is consistent with the other examples in its positioning. --- .github/ISSUE_TEMPLATE/bug_report.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 4ce2db180..1a7f49aae 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -23,7 +23,8 @@ A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. -**Meilisearch version:** [e.g. v0.20.0] +**Meilisearch version:** +[e.g. v0.20.0] **Additional context** Additional information that may be relevant to the issue. From 3508ba2f20c301ced1d75db7f3101e5bdbd1b37e Mon Sep 17 00:00:00 2001 From: curquiza Date: Tue, 4 Apr 2023 18:58:43 +0200 Subject: [PATCH 22/22] Add sprint issue to the template issues --- .github/ISSUE_TEMPLATE/sprint_issue.md | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/sprint_issue.md diff --git a/.github/ISSUE_TEMPLATE/sprint_issue.md b/.github/ISSUE_TEMPLATE/sprint_issue.md new file mode 100644 index 000000000..f6303e362 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/sprint_issue.md @@ -0,0 +1,34 @@ +--- +name: New sprint issue +about: ⚠️ Should only be used by the engine team ⚠️ +title: '' +labels: '' +assignees: '' + +--- + +Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_) +Related product discussion: +Related spec: WIP + +## Motivation + + + +## Usage + + + +Refer to the final spec to know the details and the final decisions about the usage. + +## TODO + + + +- [ ] Release a prototype +- [ ] If prototype validated, merge changes into `main` +- [ ] Update the spec + +## Impacted teams + +