From 07bb32b6225ae2068e6551b5f570347150656caa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Mon, 29 Aug 2022 18:26:01 +0200 Subject: [PATCH 01/44] Update ubuntu-18.04 to 20.04 --- .github/workflows/coverage.yml | 2 +- .github/workflows/flaky.yml | 2 +- .github/workflows/publish-binaries.yml | 6 +++--- .github/workflows/publish-deb-brew-pkg.yml | 4 ++-- .github/workflows/rust.yml | 8 ++++---- bors.toml | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3a10a611f..acef34200 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -6,7 +6,7 @@ name: Execute code coverage jobs: nightly-coverage: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 diff --git a/.github/workflows/flaky.yml b/.github/workflows/flaky.yml index 8d34da4d9..3ba11fe77 100644 --- a/.github/workflows/flaky.yml +++ b/.github/workflows/flaky.yml @@ -5,7 +5,7 @@ on: jobs: flaky: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index a9fa50223..a2f43d867 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -35,9 +35,9 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-18.04, macos-latest, windows-latest] + os: [ubuntu-20.04, macos-latest, windows-latest] include: - - os: ubuntu-18.04 + - os: ubuntu-20.04 artifact_name: meilisearch asset_name: meilisearch-linux-amd64 - os: macos-latest @@ -72,7 +72,7 @@ jobs: matrix: include: - build: aarch64 - os: ubuntu-18.04 + os: ubuntu-20.04 target: aarch64-unknown-linux-gnu linker: gcc-aarch64-linux-gnu use-cross: true diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index 96bf9af9e..b5fc330bf 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -15,7 +15,7 @@ jobs: debian: name: Publish debian packagge - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 needs: check-version steps: - uses: hecrj/setup-rust-action@master @@ -38,7 +38,7 @@ jobs: homebrew: name: Bump Homebrew formula - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 needs: check-version steps: - name: Create PR to Homebrew diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 266e306d6..0e92fc706 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-18.04, macos-latest, windows-latest] + os: [ubuntu-20.04, macos-latest, windows-latest] steps: - uses: actions/checkout@v3 - name: Cache dependencies @@ -40,7 +40,7 @@ jobs: # We run tests in debug also, to make sure that the debug_assertions are hit test-debug: name: Run tests in debug - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 @@ -58,7 +58,7 @@ jobs: clippy: name: Run Clippy - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 @@ -77,7 +77,7 @@ jobs: fmt: name: Run Rustfmt - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 - uses: actions-rs/toolchain@v1 diff --git a/bors.toml b/bors.toml index b357e8d61..a29054dfb 100644 --- a/bors.toml +++ b/bors.toml @@ -1,5 +1,5 @@ status = [ - 'Tests on ubuntu-18.04', + 'Tests on ubuntu-20.04', 'Tests on macos-latest', 'Tests on windows-latest', 'Run Clippy', From 80b1f3e83021fcc0afef161ae371ef12c8e302b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 31 Aug 2022 17:28:42 +0200 Subject: [PATCH 02/44] Add dry run for publishing binaries: check the compilation works --- .github/workflows/publish-binaries.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index a2f43d867..0c13140c9 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -1,4 +1,6 @@ on: + schedule: + - cron: '0 2 * * *' # Every day at 2:00am release: types: [published] @@ -8,13 +10,14 @@ jobs: check-version: name: Check the version validity runs-on: ubuntu-latest + # No need to check the version for dry run (cron) + if: github.event_name != 'schedule' steps: - uses: actions/checkout@v2 # Check if the tag has the v.. format. # If yes, it means we are publishing an official release. # If no, we are releasing a RC, so no need to check the version. - name: Check tag format - if: github.event_name != 'schedule' id: check-tag-format run: | escaped_tag=$(printf "%q" ${{ github.ref_name }}) @@ -54,7 +57,9 @@ jobs: - uses: actions/checkout@v3 - name: Build run: cargo build --release --locked + # No need to upload binaries for dry run (cron) - name: Upload binaries to release + if: github.event_name != 'schedule' uses: svenstaro/upload-release-action@v1-release with: repo_token: ${{ secrets.PUBLISH_TOKEN }} @@ -123,6 +128,8 @@ jobs: run: ls -lR ./target - name: Upload the binary to release + # No need to upload binaries for dry run (cron) + if: github.event_name != 'schedule' uses: svenstaro/upload-release-action@v1-release with: repo_token: ${{ secrets.PUBLISH_TOKEN }} From 60792eebcf33785126f10e9e054638405b3a0e78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Wi=C5=9Bniewski?= Date: Wed, 31 Aug 2022 19:29:53 +0200 Subject: [PATCH 03/44] Fix #2207 - do not panic when the error message length is between 100 and 135 --- meilisearch-lib/src/document_formats.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index ebc98f3fb..be48c9bbf 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -44,7 +44,7 @@ impl Display for DocumentFormatError { // The user input maybe insanely long. We need to truncate it. let mut serde_msg = se.to_string(); let ellipsis = "..."; - if serde_msg.len() > 100 + ellipsis.len() { + if serde_msg.len() > (50 + 85) + ellipsis.len() { serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis); } From 8f3b590436dcd905a561a86d481ce8995880cd2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Thu, 1 Sep 2022 13:34:34 +0200 Subject: [PATCH 04/44] Move if conditions to the steps and not to the whole job --- .github/workflows/publish-binaries.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 0c13140c9..65d4746ba 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -11,13 +11,13 @@ jobs: name: Check the version validity runs-on: ubuntu-latest # No need to check the version for dry run (cron) - if: github.event_name != 'schedule' steps: - uses: actions/checkout@v2 # Check if the tag has the v.. format. # If yes, it means we are publishing an official release. # If no, we are releasing a RC, so no need to check the version. - name: Check tag format + if: github.event_name != 'schedule' id: check-tag-format run: | escaped_tag=$(printf "%q" ${{ github.ref_name }}) @@ -28,7 +28,7 @@ jobs: echo ::set-output name=stable::false fi - name: Check release validity - if: steps.check-tag-format.outputs.stable == 'true' + if: github.event_name != 'schedule' && steps.check-tag-format.outputs.stable == 'true' run: bash .github/scripts/check-release.sh publish: From d1b364292322028d4ea4fea820b70c13a3bac222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Wi=C5=9Bniewski?= Date: Thu, 1 Sep 2022 20:50:11 +0200 Subject: [PATCH 05/44] Extract input to trim lengths to variables --- meilisearch-lib/src/document_formats.rs | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index be48c9bbf..a0b3c0552 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -44,8 +44,15 @@ impl Display for DocumentFormatError { // The user input maybe insanely long. We need to truncate it. let mut serde_msg = se.to_string(); let ellipsis = "..."; - if serde_msg.len() > (50 + 85) + ellipsis.len() { - serde_msg.replace_range(50..serde_msg.len() - 85, ellipsis); + let trim_input_prefix_len = 50; + let trim_input_suffix_len = 85; + + if serde_msg.len() > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() + { + serde_msg.replace_range( + trim_input_prefix_len..serde_msg.len() - trim_input_suffix_len, + ellipsis, + ); } write!( @@ -136,9 +143,14 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { let content: ArrayOrSingleObject = serde_json::from_reader(reader) .map_err(Error::Json) - .map_err(|e| (PayloadType::Json, e))?; + .map_err(|e| { + println!("Błąd o taki: {:#?}", e); + (PayloadType::Json, e) + })?; + println!("content o taki: {:#?}", content); for object in content.inner.map_right(|o| vec![o]).into_inner() { + println!("{:#?}", object); builder .append_json_object(&object) .map_err(Into::into) @@ -146,6 +158,8 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { } let count = builder.documents_count(); + println!("{count}"); + let _ = builder .into_inner() .map_err(Into::into) From 3878c289dfe8a301374ab5c83eabe66205cbf6cf Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Thu, 1 Sep 2022 22:34:20 +0200 Subject: [PATCH 06/44] feat: add missing env var for dumps and snapshots feature --- meilisearch-http/src/option.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 6848e693d..3ca967b0e 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -103,15 +103,15 @@ pub struct Opt { /// Defines the path of the snapshot file to import. /// This option will, by default, stop the process if a database already exist or if no snapshot exists at /// the given path. If this option is not specified no snapshot is imported. - #[clap(long)] + #[clap(long, env = "MEILI_IMPORT_SNAPSHOT")] pub import_snapshot: Option, /// The engine will ignore a missing snapshot and not return an error in such case. - #[clap(long, requires = "import-snapshot")] + #[clap(long, env = "MEILI_IGNORE_MISSING_SNAPSHOT", requires = "import-snapshot")] pub ignore_missing_snapshot: bool, /// The engine will skip snapshot importation and not return an error in such case. - #[clap(long, requires = "import-snapshot")] + #[clap(long, env = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS", requires = "import-snapshot")] pub ignore_snapshot_if_db_exists: bool, /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. @@ -127,15 +127,15 @@ pub struct Opt { pub snapshot_interval_sec: u64, /// Import a dump from the specified path, must be a `.dump` file. - #[clap(long, conflicts_with = "import-snapshot")] + #[clap(long, env = "MEILI_IMPORT_DUMP", conflicts_with = "import-snapshot")] pub import_dump: Option, /// If the dump doesn't exists, load or create the database specified by `db-path` instead. - #[clap(long, requires = "import-dump")] + #[clap(long, env = "MEILI_IGNORE_MISSING_DUMP", requires = "import-dump")] pub ignore_missing_dump: bool, /// Ignore the dump if a database already exists, and load that database instead. - #[clap(long, requires = "import-dump")] + #[clap(long, env = "MEILI_IGNORE_DUMP_IF_DB_EXISTS", requires = "import-dump")] pub ignore_dump_if_db_exists: bool, /// Folder where dumps are created when the dump route is called. From d0f1054f5c89ad1b3d2c3ed9d1895af75a485842 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Thu, 1 Sep 2022 22:37:07 +0200 Subject: [PATCH 07/44] chore: cargo fmt --- meilisearch-http/src/option.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 3ca967b0e..bdfa283a6 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -107,11 +107,19 @@ pub struct Opt { pub import_snapshot: Option, /// The engine will ignore a missing snapshot and not return an error in such case. - #[clap(long, env = "MEILI_IGNORE_MISSING_SNAPSHOT", requires = "import-snapshot")] + #[clap( + long, + env = "MEILI_IGNORE_MISSING_SNAPSHOT", + requires = "import-snapshot" + )] pub ignore_missing_snapshot: bool, /// The engine will skip snapshot importation and not return an error in such case. - #[clap(long, env = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS", requires = "import-snapshot")] + #[clap( + long, + env = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS", + requires = "import-snapshot" + )] pub ignore_snapshot_if_db_exists: bool, /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. From ae14567f9703537de2be151ef6ec544df7f3dbd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Fri, 2 Sep 2022 16:22:21 +0200 Subject: [PATCH 08/44] Add CI manifest to automate some step of the release management when creating/closing a Milestone --- .../release-management-automation.yml | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 .github/workflows/release-management-automation.yml diff --git a/.github/workflows/release-management-automation.yml b/.github/workflows/release-management-automation.yml new file mode 100644 index 000000000..5386a76fb --- /dev/null +++ b/.github/workflows/release-management-automation.yml @@ -0,0 +1,164 @@ +name: Automate some steps of release management + +# /!\ No git flow are handled here + +# For each Milestone created (not opened!), and if the release is NOT a patch release (only the patch changed) +# - the roadmap issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/roadmap-issue.md +# - the changelog issue is created, see https://github.com/meilisearch/core-team/blob/main/issue-templates/changelog-issue.md + +# For each Milestone closed +# - the `release_version` label is created +# - this label is applied to all issues/PRs in the Milestone + +on: + milestone: + types: [created, closed] + +jobs: + +# ----------------- +# MILESTONE CREATED +# ----------------- + + get-release-version: + if: github.event.action == 'created' + runs-on: ubuntu-latest + outputs: + is-patch: ${{ steps.check-patch.outputs.is-patch }} + env: + MILESTONE_VERSION: ${{ github.event.milestone.title }} + steps: + - uses: actions/checkout@v3 + - name: Check if this release is a patch release only + id: check-patch + run: | + echo version: $MILESTONE_VERSION + if [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.0$ ]]; then + echo 'This is NOT a patch release' + echo ::set-output name=is-patch::false + elif [[ $MILESTONE_VERSION =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo 'This is a patch release' + echo ::set-output name=is-patch::true + else + echo "Not a valid format of release, check the Milestone's title." + echo 'Should be vX.Y.Z' + exit 1 + fi + + create-roadmap-issue: + needs: get-release-version + # Create the roadmap issue if the release is not only a patch release + if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + ISSUE_TEMPLATE: issue-template.md + MILESTONE_VERSION: ${{ github.event.milestone.title }} + MILESTONE_URL: ${{ github.event.milestone.html_url }} + MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE + - name: Replace all empty occurences in the templates + run: | + # Replace all <> occurences + sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE + + # Replace all <> occurences + milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) + sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE + + # Replace release date if exists + if [[ ! -z $MILESTONE_DUE_ON ]]; then + date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1) + sed -i "s/Release date\: 20XX-XX-XX/Release date\: $date/g" $ISSUE_TEMPLATE + fi + - name: Create the issue + run: | + gh issue create \ + --title "$MILESTONE_VERSION ROADMAP" \ + --label 'epic,impacts docs,impacts integrations,impacts cloud' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION + + create-changelog-issue: + needs: get-release-version + # Create the changelog issue if the release is not only a patch release + if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + ISSUE_TEMPLATE: issue-template.md + MILESTONE_VERSION: ${{ github.event.milestone.title }} + MILESTONE_URL: ${{ github.event.milestone.html_url }} + steps: + - uses: actions/checkout@v3 + - name: Download the issue template + run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE + - name: Replace all empty occurences in the templates + run: | + # Replace all <> occurences + sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE + + # Replace all <> occurences + milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) + sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE + - name: Create the issue + run: | + gh issue create \ + --title "Create release changelogs for $MILESTONE_VERSION" \ + --label 'impacts docs,documentation' \ + --body-file $ISSUE_TEMPLATE \ + --milestone $MILESTONE_VERSION \ + --assignee curquiza + +# ---------------- +# MILESTONE CLOSED +# ---------------- + + create-release-label: + if: github.event.action == 'closed' + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + MILESTONE_VERSION: ${{ github.event.milestone.title }} + MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} + steps: + - uses: actions/checkout@v3 + - name: Create the $MILESTONE_VERSION label + run: | + label_description="PRs/issues solved in $MILESTONE_VERSION" + if [[ ! -z $MILESTONE_DUE_ON ]]; then + date=$(echo $MILESTONE_DUE_ON | cut -d 'T' -f 1) + label_description="$label_description released on $date" + fi + + gh api repos/curquiza/meilisearch/labels \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -f name="$MILESTONE_VERSION" \ + -f description="$label_description" \ + -f color='ff5ba3' + + labelize-all-milestone-content: + if: github.event.action == 'closed' + needs: create-release-label + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + MILESTONE_VERSION: ${{ github.event.milestone.title }} + steps: + - uses: actions/checkout@v3 + - name: Add label $MILESTONE_VERSION to all PRs in the Milestone + run: | + prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') + for pr in $prs; do + gh pr $pr edit --add-label $MILESTONE_VERSION + done + - name: Add label $MILESTONE_VERSION to all issues in the Milestone + run: | + issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') + for issue in $issues; do + gh issue edit $issue --add-label $MILESTONE_VERSION + done From 2eca723a915b2ca2714aff5b73b8eec29a6a0b38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Fri, 2 Sep 2022 16:45:32 +0200 Subject: [PATCH 09/44] Update checkout v2 to v3 in CI manifests --- .github/workflows/publish-binaries.yml | 2 +- .github/workflows/publish-deb-brew-pkg.yml | 2 +- .github/workflows/publish-docker-images.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 65d4746ba..9eecaf908 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest # No need to check the version for dry run (cron) steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Check if the tag has the v.. format. # If yes, it means we are publishing an official release. # If no, we are releasing a RC, so no need to check the version. diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index b5fc330bf..1dc56f940 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -9,7 +9,7 @@ jobs: name: Check the version validity runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check release validity run: bash .github/scripts/check-release.sh diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 72234fc01..88605bee1 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -12,7 +12,7 @@ jobs: docker: runs-on: docker steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Check if the tag has the v.. format. If yes, it means we are publishing an official release. # In this situation, we need to set `output.stable` to create/update the following tags (additionally to the `vX.Y.Z` Docker tag): From cc09aa8868d7fd4b4f784e991e74ac42a28c3741 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Fri, 2 Sep 2022 17:07:23 +0200 Subject: [PATCH 10/44] Refacto env var --- .../release-management-automation.yml | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/.github/workflows/release-management-automation.yml b/.github/workflows/release-management-automation.yml index 5386a76fb..b508b1271 100644 --- a/.github/workflows/release-management-automation.yml +++ b/.github/workflows/release-management-automation.yml @@ -14,6 +14,12 @@ on: milestone: types: [created, closed] +env: + MILESTONE_VERSION: ${{ github.event.milestone.title }} + MILESTONE_URL: ${{ github.event.milestone.html_url }} + MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + jobs: # ----------------- @@ -51,11 +57,7 @@ jobs: if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' runs-on: ubuntu-latest env: - GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} ISSUE_TEMPLATE: issue-template.md - MILESTONE_VERSION: ${{ github.event.milestone.title }} - MILESTONE_URL: ${{ github.event.milestone.html_url }} - MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} steps: - uses: actions/checkout@v3 - name: Download the issue template @@ -88,10 +90,7 @@ jobs: if: github.event.action == 'created' && needs.get-release-version.outputs.is-patch == 'false' runs-on: ubuntu-latest env: - GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} ISSUE_TEMPLATE: issue-template.md - MILESTONE_VERSION: ${{ github.event.milestone.title }} - MILESTONE_URL: ${{ github.event.milestone.html_url }} steps: - uses: actions/checkout@v3 - name: Download the issue template @@ -120,10 +119,6 @@ jobs: create-release-label: if: github.event.action == 'closed' runs-on: ubuntu-latest - env: - GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} - MILESTONE_VERSION: ${{ github.event.milestone.title }} - MILESTONE_DUE_ON: ${{ github.event.milestone.due_on }} steps: - uses: actions/checkout@v3 - name: Create the $MILESTONE_VERSION label @@ -145,9 +140,6 @@ jobs: if: github.event.action == 'closed' needs: create-release-label runs-on: ubuntu-latest - env: - GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} - MILESTONE_VERSION: ${{ github.event.milestone.title }} steps: - uses: actions/checkout@v3 - name: Add label $MILESTONE_VERSION to all PRs in the Milestone From 9cb1e4af5c88c9d9cddedf302cdcb40fb04a5c95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sat, 3 Sep 2022 17:46:37 +0200 Subject: [PATCH 11/44] Rename workflow file --- ...release-management-automation.yml => milestone-workflow.yml} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename .github/workflows/{release-management-automation.yml => milestone-workflow.yml} (99%) diff --git a/.github/workflows/release-management-automation.yml b/.github/workflows/milestone-workflow.yml similarity index 99% rename from .github/workflows/release-management-automation.yml rename to .github/workflows/milestone-workflow.yml index b508b1271..0bec41d70 100644 --- a/.github/workflows/release-management-automation.yml +++ b/.github/workflows/milestone-workflow.yml @@ -1,4 +1,4 @@ -name: Automate some steps of release management +name: Milestone's workflow # /!\ No git flow are handled here From d0aa8042e2664b520fef45a49f269ea994119ba3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sat, 3 Sep 2022 17:53:37 +0200 Subject: [PATCH 12/44] Fix job names --- .github/workflows/milestone-workflow.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml index 0bec41d70..4d0425f14 100644 --- a/.github/workflows/milestone-workflow.yml +++ b/.github/workflows/milestone-workflow.yml @@ -121,7 +121,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Create the $MILESTONE_VERSION label + - name: Create the ${{ env.MILESTONE_VERSION }} label run: | label_description="PRs/issues solved in $MILESTONE_VERSION" if [[ ! -z $MILESTONE_DUE_ON ]]; then @@ -142,13 +142,13 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Add label $MILESTONE_VERSION to all PRs in the Milestone + - name: Add label ${{ env.MILESTONE_VERSION }} to all PRs in the Milestone run: | prs=$(gh pr list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') for pr in $prs; do gh pr $pr edit --add-label $MILESTONE_VERSION done - - name: Add label $MILESTONE_VERSION to all issues in the Milestone + - name: Add label ${{ env.MILESTONE_VERSION }} to all issues in the Milestone run: | issues=$(gh issue list --search milestone:"$MILESTONE_VERSION" --limit 1000 --state all --json number --template '{{range .}}{{tablerow (printf "%v" .number)}}{{end}}') for issue in $issues; do From 5f5b787483272b409037862e0ee5fb6286bb30b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 4 Sep 2022 11:32:22 +0200 Subject: [PATCH 13/44] Use meili-bot PAT everywhere --- .github/workflows/create-issue-dependencies.yml | 6 +++--- .github/workflows/publish-binaries.yml | 4 ++-- .github/workflows/publish-deb-brew-pkg.yml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/create-issue-dependencies.yml b/.github/workflows/create-issue-dependencies.yml index 638088c2e..e3deebe2a 100644 --- a/.github/workflows/create-issue-dependencies.yml +++ b/.github/workflows/create-issue-dependencies.yml @@ -3,7 +3,7 @@ on: schedule: - cron: '0 0 1 */3 *' workflow_dispatch: - + jobs: create-issue: runs-on: ubuntu-latest @@ -12,12 +12,12 @@ jobs: - name: Create an issue uses: actions-ecosystem/action-create-issue@v1 with: - github_token: ${{ secrets.GITHUB_TOKEN }} + github_token: ${{ secrets.MEILI_BOT_GH_PAT }} title: Upgrade dependencies body: | We need to update the dependencies of the Meilisearch repository, and, if possible, the dependencies of all the core-team repositories that Meilisearch depends on (milli, charabia, heed...). - ⚠️ This issue should only be done at the beginning of the sprint! + ⚠️ This issue should only be done at the beginning of the sprint! labels: | dependencies maintenance diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 9eecaf908..95088b1ef 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -62,7 +62,7 @@ jobs: if: github.event_name != 'schedule' uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.PUBLISH_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/release/${{ matrix.artifact_name }} asset_name: ${{ matrix.asset_name }} tag: ${{ github.ref }} @@ -132,7 +132,7 @@ jobs: if: github.event_name != 'schedule' uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.PUBLISH_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/${{ matrix.target }}/release/meilisearch asset_name: ${{ matrix.asset_name }} tag: ${{ github.ref }} diff --git a/.github/workflows/publish-deb-brew-pkg.yml b/.github/workflows/publish-deb-brew-pkg.yml index 1dc56f940..a135ddafb 100644 --- a/.github/workflows/publish-deb-brew-pkg.yml +++ b/.github/workflows/publish-deb-brew-pkg.yml @@ -29,7 +29,7 @@ jobs: - name: Upload debian pkg to release uses: svenstaro/upload-release-action@v1-release with: - repo_token: ${{ secrets.GITHUB_TOKEN }} + repo_token: ${{ secrets.MEILI_BOT_GH_PAT }} file: target/debian/meilisearch.deb asset_name: meilisearch.deb tag: ${{ github.ref }} From b897ce8dfa51841b0430c0eb16ccb75e02e64f56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Sun, 4 Sep 2022 11:55:00 +0200 Subject: [PATCH 14/44] Add CI to update th Meilisearch version in Cargo.toml files --- .../workflows/update-cargo-toml-version.yml | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/update-cargo-toml-version.yml diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml new file mode 100644 index 000000000..e3613a3b9 --- /dev/null +++ b/.github/workflows/update-cargo-toml-version.yml @@ -0,0 +1,47 @@ +name: Update Meilisearch version in all Cargo.toml files + +on: + workflow_dispatch: + inputs: + new_version: + description: 'The new version (vX.Y.Z)' + required: true + +env: + NEW_VERSION: ${{ github.event.inputs.new_version }} + GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} + +jobs: + + update-version-cargo-toml: + name: Update version in cargo.toml files + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v3 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Install sd + run: cargo install sd + - name: Update files + run: | + echo "$GITHUB_REF_NAME" + raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2) + new_string="version = \"$raw_new_version\"" + sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml + - name: Build Meilisearch to update Cargo.lock + run: cargo build + - name: Commits and push the changes to the ${{ github.ref_name }} branch + uses: EndBug/add-and-commit@v9 + with: + message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files" + new_branch: update-version-${{ env.NEW_VERSION }} + - name: Create the PR + run: | + gh pr create \ + --title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \ + --body '⚠️ This PR is automatically generated. Check the new version is the expected one before merging.' \ + --label 'skip changelog' \ + --milestone $NEW_VERSION From 55aa83d75a776d21823c6298406f4beff3c10245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Mon, 5 Sep 2022 16:41:01 +0200 Subject: [PATCH 15/44] Minor fixes in the just added update-version CI --- .github/workflows/update-cargo-toml-version.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml index e3613a3b9..968b5f050 100644 --- a/.github/workflows/update-cargo-toml-version.yml +++ b/.github/workflows/update-cargo-toml-version.yml @@ -9,12 +9,13 @@ on: env: NEW_VERSION: ${{ github.event.inputs.new_version }} + NEW_BRANCH: update-version-${{ github.event.inputs.new_version }} GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }} jobs: update-version-cargo-toml: - name: Update version in cargo.toml files + name: Update version in Cargo.toml files runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v3 @@ -25,20 +26,19 @@ jobs: override: true - name: Install sd run: cargo install sd - - name: Update files + - name: Update Cargo.toml files run: | - echo "$GITHUB_REF_NAME" raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2) new_string="version = \"$raw_new_version\"" sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml - name: Build Meilisearch to update Cargo.lock run: cargo build - - name: Commits and push the changes to the ${{ github.ref_name }} branch + - name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch uses: EndBug/add-and-commit@v9 with: message: "Update version for the next release (${{ env.NEW_VERSION }}) in Cargo.toml files" - new_branch: update-version-${{ env.NEW_VERSION }} - - name: Create the PR + new_branch: ${{ env.NEW_BRANCH }} + - name: Create the PR pointing to ${{ github.ref_name }} run: | gh pr create \ --title "Update version for the next release ($NEW_VERSION) in Cargo.toml files" \ From 403226a02929e44ef4a913a68359f62fb9fa7326 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Tue, 6 Sep 2022 09:23:16 +0200 Subject: [PATCH 16/44] Add support for config file --- Cargo.lock | 1 + meilisearch-http/Cargo.toml | 1 + meilisearch-http/src/main.rs | 5 +- meilisearch-http/src/option.rs | 172 ++++++++++++++++++++++++++------- meilisearch-lib/src/lib.rs | 13 +++ meilisearch-lib/src/options.rs | 50 ++++++++-- 6 files changed, 196 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index babbd0ab2..18375d2fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2087,6 +2087,7 @@ dependencies = [ "time 0.3.9", "tokio", "tokio-stream", + "toml", "urlencoding", "uuid", "vergen", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 38f9a83fc..88be3c154 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -76,6 +76,7 @@ thiserror = "1.0.30" time = { version = "0.3.7", features = ["serde-well-known", "formatting", "parsing", "macros"] } tokio = { version = "1.17.0", features = ["full"] } tokio-stream = "0.1.8" +toml = "0.5.9" uuid = { version = "1.1.2", features = ["serde", "v4"] } walkdir = "2.3.2" prometheus = { version = "0.13.0", features = ["process"], optional = true } diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 9627aeef8..e74c1e056 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use actix_web::http::KeepAlive; use actix_web::HttpServer; -use clap::Parser; use meilisearch_auth::AuthController; use meilisearch_http::analytics; use meilisearch_http::analytics::Analytics; @@ -29,7 +28,9 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[actix_web::main] async fn main() -> anyhow::Result<()> { - let opt = Opt::parse(); + let opt = Opt::build(); + + println!("{:?}", opt); setup(&opt)?; diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index bdfa283a6..143dfb231 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -4,8 +4,11 @@ use std::path::PathBuf; use std::sync::Arc; use byte_unit::Byte; -use clap::Parser; -use meilisearch_lib::options::{IndexerOpts, SchedulerConfig}; +use clap::{Arg, Command, Parser}; +use meilisearch_lib::{ + export_to_env_if_not_present, + options::{IndexerOpts, SchedulerConfig}, +}; use rustls::{ server::{ AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, @@ -14,90 +17,114 @@ use rustls::{ RootCertStore, }; use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys}; -use serde::Serialize; +use serde::{Deserialize, Serialize}; const POSSIBLE_ENV: [&str; 2] = ["development", "production"]; -#[derive(Debug, Clone, Parser, Serialize)] +const MEILI_DB_PATH: &str = "MEILI_DB_PATH"; +const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR"; +const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY"; +const MEILI_ENV: &str = "MEILI_ENV"; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS"; +const MEILI_MAX_INDEX_SIZE: &str = "MEILI_MAX_INDEX_SIZE"; +const MEILI_MAX_TASK_DB_SIZE: &str = "MEILI_MAX_TASK_DB_SIZE"; +const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT"; +const MEILI_SSL_CERT_PATH: &str = "MEILI_SSL_CERT_PATH"; +const MEILI_SSL_KEY_PATH: &str = "MEILI_SSL_KEY_PATH"; +const MEILI_SSL_AUTH_PATH: &str = "MEILI_SSL_AUTH_PATH"; +const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH"; +const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH"; +const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION"; +const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS"; +const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR"; +const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT"; +const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC"; +const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR"; +const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; +#[cfg(feature = "metrics")] +const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE"; + +#[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] pub struct Opt { /// The destination where the database must be created. - #[clap(long, env = "MEILI_DB_PATH", default_value = "./data.ms")] + #[clap(long, env = MEILI_DB_PATH, default_value = "./data.ms")] pub db_path: PathBuf, /// The address on which the http server will listen. - #[clap(long, env = "MEILI_HTTP_ADDR", default_value = "127.0.0.1:7700")] + #[clap(long, env = MEILI_HTTP_ADDR, default_value = "127.0.0.1:7700")] pub http_addr: String, /// The master key allowing you to do everything on the server. - #[serde(skip)] - #[clap(long, env = "MEILI_MASTER_KEY")] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, /// This environment variable must be set to `production` if you are running in production. /// If the server is running in development mode more logs will be displayed, /// and the master key can be avoided which implies that there is no security on the updates routes. /// This is useful to debug when integrating the engine with another service. - #[clap(long, env = "MEILI_ENV", default_value = "development", possible_values = &POSSIBLE_ENV)] + #[clap(long, env = MEILI_ENV, default_value = "development", possible_values = &POSSIBLE_ENV)] pub env: String, /// Do not send analytics to Meili. #[cfg(all(not(debug_assertions), feature = "analytics"))] - #[serde(skip)] // we can't send true - #[clap(long, env = "MEILI_NO_ANALYTICS")] + #[serde(skip_serializing)] // we can't send true + #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, /// The maximum size, in bytes, of the main lmdb database directory - #[clap(long, env = "MEILI_MAX_INDEX_SIZE", default_value = "100 GiB")] + #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value = "100 GiB")] pub max_index_size: Byte, /// The maximum size, in bytes, of the update lmdb database directory - #[clap(long, env = "MEILI_MAX_TASK_DB_SIZE", default_value = "100 GiB")] + #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value = "100 GiB")] pub max_task_db_size: Byte, /// The maximum size, in bytes, of accepted JSON payloads - #[clap(long, env = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT", default_value = "100 MB")] + #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value = "100 MB")] pub http_payload_size_limit: Byte, /// Read server certificates from CERTFILE. /// This should contain PEM-format certificates /// in the right order (the first certificate should /// certify KEYFILE, the last should be a root CA). - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_CERT_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))] pub ssl_cert_path: Option, /// Read private key from KEYFILE. This should be a RSA /// private key or PKCS8-encoded private key, in PEM format. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_KEY_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))] pub ssl_key_path: Option, /// Enable client authentication, and accept certificates /// signed by those roots provided in CERTFILE. - #[clap(long, env = "MEILI_SSL_AUTH_PATH", parse(from_os_str))] - #[serde(skip)] + #[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))] + #[serde(skip_serializing)] pub ssl_auth_path: Option, /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate. /// Optional - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_OCSP_PATH", parse(from_os_str))] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_OCSP_PATH, parse(from_os_str))] pub ssl_ocsp_path: Option, /// Send a fatal alert if the client does not complete client authentication. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_REQUIRE_AUTH")] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, /// SSL support session resumption - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_RESUMPTION")] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, /// SSL support tickets. - #[serde(skip)] - #[clap(long, env = "MEILI_SSL_TICKETS")] + #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, /// Defines the path of the snapshot file to import. @@ -123,15 +150,15 @@ pub struct Opt { pub ignore_snapshot_if_db_exists: bool, /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. - #[clap(long, env = "MEILI_SNAPSHOT_DIR", default_value = "snapshots/")] + #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value = "snapshots/")] pub snapshot_dir: PathBuf, /// Activate snapshot scheduling. - #[clap(long, env = "MEILI_SCHEDULE_SNAPSHOT")] + #[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)] pub schedule_snapshot: bool, /// Defines time interval, in seconds, between each snapshot creation. - #[clap(long, env = "MEILI_SNAPSHOT_INTERVAL_SEC", default_value = "86400")] // 24h + #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value = "86400")] // 24h pub snapshot_interval_sec: u64, /// Import a dump from the specified path, must be a `.dump` file. @@ -147,16 +174,16 @@ pub struct Opt { pub ignore_dump_if_db_exists: bool, /// Folder where dumps are created when the dump route is called. - #[clap(long, env = "MEILI_DUMPS_DIR", default_value = "dumps/")] + #[clap(long, env = MEILI_DUMPS_DIR, default_value = "dumps/")] pub dumps_dir: PathBuf, /// Set the log level - #[clap(long, env = "MEILI_LOG_LEVEL", default_value = "info")] + #[clap(long, env = MEILI_LOG_LEVEL, default_value = "info")] pub log_level: String, /// Enables Prometheus metrics and /metrics route. #[cfg(feature = "metrics")] - #[clap(long, env = "MEILI_ENABLE_METRICS_ROUTE")] + #[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)] pub enable_metrics_route: bool, #[serde(flatten)] @@ -175,6 +202,83 @@ impl Opt { !self.no_analytics } + pub fn build() -> Self { + let args = Command::new("config") + .arg( + Arg::new("config_file_path") + .long("config-file-path") + .takes_value(true) + .default_value("./config.toml") + .help("Path to a config file, must be TOML format"), + ) + .get_matches(); + let config_file_path = args + .value_of("config_file_path") + .expect("default value present"); + if let Some(Ok(opts_from_file)) = match std::fs::read_to_string(config_file_path) { + Ok(config_str) => Some(toml::from_str::(&config_str)), + Err(err) => { + log::debug!("can't read {} : {}", config_file_path, err); + None + } + } { + opts_from_file.export_to_env(); + } + + Opt::parse() + } + + fn export_to_env(self) { + export_to_env_if_not_present(MEILI_DB_PATH, self.db_path); + export_to_env_if_not_present(MEILI_HTTP_ADDR, self.http_addr); + if let Some(master_key) = self.master_key { + export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); + } + export_to_env_if_not_present(MEILI_ENV, self.env); + #[cfg(all(not(debug_assertions), feature = "analytics"))] + { + export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics); + } + export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, self.max_index_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, self.max_task_db_size.to_string()); + export_to_env_if_not_present( + MEILI_HTTP_PAYLOAD_SIZE_LIMIT, + self.http_payload_size_limit.to_string(), + ); + if let Some(ssl_cert_path) = self.ssl_cert_path { + export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); + } + if let Some(ssl_key_path) = self.ssl_key_path { + export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); + } + if let Some(ssl_auth_path) = self.ssl_auth_path { + export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); + } + if let Some(ssl_ocsp_path) = self.ssl_ocsp_path { + export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); + } + export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, self.ssl_require_auth.to_string()); + export_to_env_if_not_present(MEILI_SSL_RESUMPTION, self.ssl_resumption.to_string()); + export_to_env_if_not_present(MEILI_SSL_TICKETS, self.ssl_tickets.to_string()); + export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, self.snapshot_dir); + export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, self.schedule_snapshot.to_string()); + export_to_env_if_not_present( + MEILI_SNAPSHOT_INTERVAL_SEC, + self.snapshot_interval_sec.to_string(), + ); + export_to_env_if_not_present(MEILI_DUMPS_DIR, self.dumps_dir); + export_to_env_if_not_present(MEILI_LOG_LEVEL, self.log_level); + #[cfg(feature = "metrics")] + { + export_to_env_if_not_present( + MEILI_ENABLE_METRICS_ROUTE, + self.enable_metrics_route.to_string(), + ); + } + self.indexer_options.export_to_env(); + self.scheduler_options.export_to_env(); + } + pub fn get_ssl_config(&self) -> anyhow::Result> { if let (Some(cert_path), Some(key_path)) = (&self.ssl_cert_path, &self.ssl_key_path) { let config = rustls::ServerConfig::builder().with_safe_defaults(); diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index 70fd2ba51..7fe0984dc 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -11,6 +11,8 @@ mod snapshot; pub mod tasks; mod update_file_store; +use std::env::VarError; +use std::ffi::OsStr; use std::path::Path; pub use index_controller::MeiliSearch; @@ -35,3 +37,14 @@ pub fn is_empty_db(db_path: impl AsRef) -> bool { true } } + +/// Checks if the key is defined in the environment variables. +/// If not, inserts it with the given value. +pub fn export_to_env_if_not_present(key: &str, value: T) +where + T: AsRef, +{ + if let Err(VarError::NotPresent) = std::env::var(key) { + std::env::set_var(key, value); + } +} diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index ea810b9b7..0b9254848 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -1,22 +1,28 @@ +use crate::export_to_env_if_not_present; + use core::fmt; use std::{convert::TryFrom, num::ParseIntError, ops::Deref, str::FromStr}; use byte_unit::{Byte, ByteError}; use clap::Parser; use milli::update::IndexerConfig; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use sysinfo::{RefreshKind, System, SystemExt}; -#[derive(Debug, Clone, Parser, Serialize)] +const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; +const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; +const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; + +#[derive(Debug, Clone, Parser, Serialize, Deserialize)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. - #[serde(skip)] + #[serde(skip_serializing)] #[clap(long, default_value = "100000", hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. - #[serde(skip)] + #[serde(skip_serializing)] #[clap(long, hide = true)] pub max_nb_chunks: Option, @@ -27,7 +33,7 @@ pub struct IndexerOpts { /// In case the engine is unable to retrieve the available memory the engine will /// try to use the memory it needs but without real limit, this can lead to /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. - #[clap(long, env = "MEILI_MAX_INDEXING_MEMORY", default_value_t)] + #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] pub max_indexing_memory: MaxMemory, /// The maximum number of threads the indexer will use. @@ -35,18 +41,33 @@ pub struct IndexerOpts { /// it will use the maximum number of available cores. /// /// It defaults to half of the available threads. - #[clap(long, env = "MEILI_MAX_INDEXING_THREADS", default_value_t)] + #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] pub max_indexing_threads: MaxThreads, } -#[derive(Debug, Clone, Parser, Default, Serialize)] +#[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] pub struct SchedulerConfig { /// The engine will disable task auto-batching, /// and will sequencialy compute each task one by one. - #[clap(long, env = "DISABLE_AUTO_BATCHING")] + #[clap(long, env = DISABLE_AUTO_BATCHING)] pub disable_auto_batching: bool, } +impl IndexerOpts { + pub fn export_to_env(self) { + if let Some(max_indexing_memory) = self.max_indexing_memory.0 { + export_to_env_if_not_present( + MEILI_MAX_INDEXING_MEMORY, + max_indexing_memory.to_string(), + ); + } + export_to_env_if_not_present( + MEILI_MAX_INDEXING_THREADS, + self.max_indexing_threads.0.to_string(), + ); + } +} + impl TryFrom<&IndexerOpts> for IndexerConfig { type Error = anyhow::Error; @@ -77,8 +98,17 @@ impl Default for IndexerOpts { } } +impl SchedulerConfig { + pub fn export_to_env(self) { + export_to_env_if_not_present( + DISABLE_AUTO_BATCHING, + self.disable_auto_batching.to_string(), + ); + } +} + /// A type used to detect the max memory available and use 2/3 of it. -#[derive(Debug, Clone, Copy, Serialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct MaxMemory(Option); impl FromStr for MaxMemory { @@ -134,7 +164,7 @@ fn total_memory_bytes() -> Option { } } -#[derive(Debug, Clone, Copy, Serialize)] +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] pub struct MaxThreads(usize); impl FromStr for MaxThreads { From 6520d3c4741f93bb38b0ecebdf8325eb335812b2 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Tue, 6 Sep 2022 14:50:49 +0200 Subject: [PATCH 17/44] Refactor build method and flag --- meilisearch-http/src/option.rs | 40 ++++++++++++++++------------------ 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 143dfb231..9c1fc5732 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -4,7 +4,7 @@ use std::path::PathBuf; use std::sync::Arc; use byte_unit::Byte; -use clap::{Arg, Command, Parser}; +use clap::Parser; use meilisearch_lib::{ export_to_env_if_not_present, options::{IndexerOpts, SchedulerConfig}, @@ -193,6 +193,12 @@ pub struct Opt { #[serde(flatten)] #[clap(flatten)] pub scheduler_options: SchedulerConfig, + + /// The path to a configuration file that should be used to setup the engine. + /// Format must be TOML. + #[serde(skip_serializing)] + #[clap(long)] + config_file_path: Option, } impl Opt { @@ -203,29 +209,21 @@ impl Opt { } pub fn build() -> Self { - let args = Command::new("config") - .arg( - Arg::new("config_file_path") - .long("config-file-path") - .takes_value(true) - .default_value("./config.toml") - .help("Path to a config file, must be TOML format"), - ) - .get_matches(); - let config_file_path = args - .value_of("config_file_path") - .expect("default value present"); - if let Some(Ok(opts_from_file)) = match std::fs::read_to_string(config_file_path) { - Ok(config_str) => Some(toml::from_str::(&config_str)), - Err(err) => { - log::debug!("can't read {} : {}", config_file_path, err); - None + let mut opts = Opt::parse(); + if let Some(config_file_path) = opts.config_file_path.as_ref() { + eprintln!("loading config file : {:?}", config_file_path); + match std::fs::read(config_file_path) { + Ok(config) => { + let opt_from_config = + toml::from_slice::(&config).expect("can't read file"); + opt_from_config.export_to_env(); + opts = Opt::parse(); + } + Err(err) => eprintln!("can't read {:?} : {}", config_file_path, err), } - } { - opts_from_file.export_to_env(); } - Opt::parse() + opts } fn export_to_env(self) { From ef3fa925367d6c116ab8e6bdf3293255cf166f03 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Wed, 7 Sep 2022 11:51:23 +0200 Subject: [PATCH 18/44] Refactor default values for clap and serde --- meilisearch-http/src/option.rs | 98 ++++++++++++++++++++++++++++------ meilisearch-lib/src/options.rs | 13 ++++- 2 files changed, 94 insertions(+), 17 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 9c1fc5732..28ea39162 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -45,15 +45,28 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; #[cfg(feature = "metrics")] const MEILI_ENABLE_METRICS_ROUTE: &str = "MEILI_ENABLE_METRICS_ROUTE"; +const DEFAULT_DB_PATH: &str = "./data.ms"; +const DEFAULT_HTTP_ADDR: &str = "127.0.0.1:7700"; +const DEFAULT_ENV: &str = "development"; +const DEFAULT_MAX_INDEX_SIZE: &str = "100 GiB"; +const DEFAULT_MAX_TASK_DB_SIZE: &str = "100 GiB"; +const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB"; +const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/"; +const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400; +const DEFAULT_DUMPS_DIR: &str = "dumps/"; +const DEFAULT_LOG_LEVEL: &str = "info"; + #[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] pub struct Opt { /// The destination where the database must be created. - #[clap(long, env = MEILI_DB_PATH, default_value = "./data.ms")] + #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] + #[serde(default = "default_db_path")] pub db_path: PathBuf, /// The address on which the http server will listen. - #[clap(long, env = MEILI_HTTP_ADDR, default_value = "127.0.0.1:7700")] + #[clap(long, env = MEILI_HTTP_ADDR, default_value_t = default_http_addr())] + #[serde(default = "default_http_addr")] pub http_addr: String, /// The master key allowing you to do everything on the server. @@ -65,25 +78,29 @@ pub struct Opt { /// If the server is running in development mode more logs will be displayed, /// and the master key can be avoided which implies that there is no security on the updates routes. /// This is useful to debug when integrating the engine with another service. - #[clap(long, env = MEILI_ENV, default_value = "development", possible_values = &POSSIBLE_ENV)] + #[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)] + #[serde(default = "default_env")] pub env: String, /// Do not send analytics to Meili. #[cfg(all(not(debug_assertions), feature = "analytics"))] - #[serde(skip_serializing)] // we can't send true + #[serde(skip_serializing, default)] // we can't send true #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, /// The maximum size, in bytes, of the main lmdb database directory - #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value = "100 GiB")] + #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())] + #[serde(default = "default_max_index_size")] pub max_index_size: Byte, /// The maximum size, in bytes, of the update lmdb database directory - #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value = "100 GiB")] + #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())] + #[serde(default = "default_max_task_db_size")] pub max_task_db_size: Byte, /// The maximum size, in bytes, of accepted JSON payloads - #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value = "100 MB")] + #[clap(long, env = MEILI_HTTP_PAYLOAD_SIZE_LIMIT, default_value_t = default_http_payload_size_limit())] + #[serde(default = "default_http_payload_size_limit")] pub http_payload_size_limit: Byte, /// Read server certificates from CERTFILE. @@ -113,17 +130,17 @@ pub struct Opt { pub ssl_ocsp_path: Option, /// Send a fatal alert if the client does not complete client authentication. - #[serde(skip_serializing)] + #[serde(skip_serializing, default)] #[clap(long, env = MEILI_SSL_REQUIRE_AUTH)] pub ssl_require_auth: bool, /// SSL support session resumption - #[serde(skip_serializing)] + #[serde(skip_serializing, default)] #[clap(long, env = MEILI_SSL_RESUMPTION)] pub ssl_resumption: bool, /// SSL support tickets. - #[serde(skip_serializing)] + #[serde(skip_serializing, default)] #[clap(long, env = MEILI_SSL_TICKETS)] pub ssl_tickets: bool, @@ -139,6 +156,7 @@ pub struct Opt { env = "MEILI_IGNORE_MISSING_SNAPSHOT", requires = "import-snapshot" )] + #[serde(default)] pub ignore_missing_snapshot: bool, /// The engine will skip snapshot importation and not return an error in such case. @@ -147,18 +165,23 @@ pub struct Opt { env = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS", requires = "import-snapshot" )] + #[serde(default)] pub ignore_snapshot_if_db_exists: bool, /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. - #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value = "snapshots/")] + #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] + #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, /// Activate snapshot scheduling. #[clap(long, env = MEILI_SCHEDULE_SNAPSHOT)] + #[serde(default)] pub schedule_snapshot: bool, /// Defines time interval, in seconds, between each snapshot creation. - #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value = "86400")] // 24h + #[clap(long, env = MEILI_SNAPSHOT_INTERVAL_SEC, default_value_t = default_snapshot_interval_sec())] + #[serde(default = "default_snapshot_interval_sec")] + // 24h pub snapshot_interval_sec: u64, /// Import a dump from the specified path, must be a `.dump` file. @@ -167,23 +190,28 @@ pub struct Opt { /// If the dump doesn't exists, load or create the database specified by `db-path` instead. #[clap(long, env = "MEILI_IGNORE_MISSING_DUMP", requires = "import-dump")] + #[serde(default)] pub ignore_missing_dump: bool, /// Ignore the dump if a database already exists, and load that database instead. #[clap(long, env = "MEILI_IGNORE_DUMP_IF_DB_EXISTS", requires = "import-dump")] + #[serde(default)] pub ignore_dump_if_db_exists: bool, /// Folder where dumps are created when the dump route is called. - #[clap(long, env = MEILI_DUMPS_DIR, default_value = "dumps/")] + #[clap(long, env = MEILI_DUMPS_DIR, default_value_os_t = default_dumps_dir())] + #[serde(default = "default_dumps_dir")] pub dumps_dir: PathBuf, /// Set the log level - #[clap(long, env = MEILI_LOG_LEVEL, default_value = "info")] + #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())] + #[serde(default = "default_log_level")] pub log_level: String, /// Enables Prometheus metrics and /metrics route. #[cfg(feature = "metrics")] #[clap(long, env = MEILI_ENABLE_METRICS_ROUTE)] + #[serde(default)] pub enable_metrics_route: bool, #[serde(flatten)] @@ -235,7 +263,7 @@ impl Opt { export_to_env_if_not_present(MEILI_ENV, self.env); #[cfg(all(not(debug_assertions), feature = "analytics"))] { - export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics); + export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics.to_string()); } export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, self.max_index_size.to_string()); export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, self.max_task_db_size.to_string()); @@ -375,6 +403,46 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } +fn default_db_path() -> PathBuf { + PathBuf::from(DEFAULT_DB_PATH) +} + +fn default_http_addr() -> String { + DEFAULT_HTTP_ADDR.to_string() +} + +fn default_env() -> String { + DEFAULT_ENV.to_string() +} + +fn default_max_index_size() -> Byte { + Byte::from_str(DEFAULT_MAX_INDEX_SIZE).unwrap() +} + +fn default_max_task_db_size() -> Byte { + Byte::from_str(DEFAULT_MAX_TASK_DB_SIZE).unwrap() +} + +fn default_http_payload_size_limit() -> Byte { + Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap() +} + +fn default_snapshot_dir() -> PathBuf { + PathBuf::from(DEFAULT_SNAPSHOT_DIR) +} + +fn default_snapshot_interval_sec() -> u64 { + DEFAULT_SNAPSHOT_INTERVAL_SEC +} + +fn default_dumps_dir() -> PathBuf { + PathBuf::from(DEFAULT_DUMPS_DIR) +} + +fn default_log_level() -> String { + DEFAULT_LOG_LEVEL.to_string() +} + #[cfg(test)] mod test { use super::*; diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 0b9254848..4e208187d 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -13,12 +13,14 @@ const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; +const DEFAULT_LOG_EVERY_N: usize = 100000; + #[derive(Debug, Clone, Parser, Serialize, Deserialize)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. - #[serde(skip_serializing)] - #[clap(long, default_value = "100000", hide = true)] // 100k + #[serde(skip_serializing, default = "default_log_every_n")] + #[clap(long, default_value_t = default_log_every_n(), hide = true)] // 100k pub log_every_n: usize, /// Grenad max number of chunks in bytes. @@ -34,6 +36,7 @@ pub struct IndexerOpts { /// try to use the memory it needs but without real limit, this can lead to /// Out-Of-Memory issues and it is recommended to specify the amount of memory to use. #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)] + #[serde(default)] pub max_indexing_memory: MaxMemory, /// The maximum number of threads the indexer will use. @@ -42,6 +45,7 @@ pub struct IndexerOpts { /// /// It defaults to half of the available threads. #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)] + #[serde(default)] pub max_indexing_threads: MaxThreads, } @@ -50,6 +54,7 @@ pub struct SchedulerConfig { /// The engine will disable task auto-batching, /// and will sequencialy compute each task one by one. #[clap(long, env = DISABLE_AUTO_BATCHING)] + #[serde(default)] pub disable_auto_batching: bool, } @@ -194,3 +199,7 @@ impl Deref for MaxThreads { &self.0 } } + +fn default_log_every_n() -> usize { + DEFAULT_LOG_EVERY_N +} From 135499f398e34be3c970c7da6165d0533c0ed759 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Wed, 7 Sep 2022 17:47:15 +0200 Subject: [PATCH 19/44] Extract new env vars to const --- meilisearch-http/src/option.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 28ea39162..c16856b58 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -37,9 +37,15 @@ const MEILI_SSL_OCSP_PATH: &str = "MEILI_SSL_OCSP_PATH"; const MEILI_SSL_REQUIRE_AUTH: &str = "MEILI_SSL_REQUIRE_AUTH"; const MEILI_SSL_RESUMPTION: &str = "MEILI_SSL_RESUMPTION"; const MEILI_SSL_TICKETS: &str = "MEILI_SSL_TICKETS"; +const MEILI_IMPORT_SNAPSHOT: &str = "MEILI_IMPORT_SNAPSHOT"; +const MEILI_IGNORE_MISSING_SNAPSHOT: &str = "MEILI_IGNORE_MISSING_SNAPSHOT"; +const MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS: &str = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS"; const MEILI_SNAPSHOT_DIR: &str = "MEILI_SNAPSHOT_DIR"; const MEILI_SCHEDULE_SNAPSHOT: &str = "MEILI_SCHEDULE_SNAPSHOT"; const MEILI_SNAPSHOT_INTERVAL_SEC: &str = "MEILI_SNAPSHOT_INTERVAL_SEC"; +const MEILI_IMPORT_DUMP: &str = "MEILI_IMPORT_DUMP"; +const MEILI_IGNORE_MISSING_DUMP: &str = "MEILI_IGNORE_MISSING_DUMP"; +const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS"; const MEILI_DUMPS_DIR: &str = "MEILI_DUMPS_DIR"; const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; #[cfg(feature = "metrics")] @@ -147,13 +153,13 @@ pub struct Opt { /// Defines the path of the snapshot file to import. /// This option will, by default, stop the process if a database already exist or if no snapshot exists at /// the given path. If this option is not specified no snapshot is imported. - #[clap(long, env = "MEILI_IMPORT_SNAPSHOT")] + #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, /// The engine will ignore a missing snapshot and not return an error in such case. #[clap( long, - env = "MEILI_IGNORE_MISSING_SNAPSHOT", + env = MEILI_IGNORE_MISSING_SNAPSHOT, requires = "import-snapshot" )] #[serde(default)] @@ -162,7 +168,7 @@ pub struct Opt { /// The engine will skip snapshot importation and not return an error in such case. #[clap( long, - env = "MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS", + env = MEILI_IGNORE_SNAPSHOT_IF_DB_EXISTS, requires = "import-snapshot" )] #[serde(default)] @@ -185,16 +191,16 @@ pub struct Opt { pub snapshot_interval_sec: u64, /// Import a dump from the specified path, must be a `.dump` file. - #[clap(long, env = "MEILI_IMPORT_DUMP", conflicts_with = "import-snapshot")] + #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")] pub import_dump: Option, /// If the dump doesn't exists, load or create the database specified by `db-path` instead. - #[clap(long, env = "MEILI_IGNORE_MISSING_DUMP", requires = "import-dump")] + #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")] #[serde(default)] pub ignore_missing_dump: bool, /// Ignore the dump if a database already exists, and load that database instead. - #[clap(long, env = "MEILI_IGNORE_DUMP_IF_DB_EXISTS", requires = "import-dump")] + #[clap(long, env = MEILI_IGNORE_DUMP_IF_DB_EXISTS, requires = "import-dump")] #[serde(default)] pub ignore_dump_if_db_exists: bool, From d1a30df23da0368b2a5f9695230eabc25c95db14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Wi=C5=9Bniewski?= Date: Wed, 7 Sep 2022 18:05:55 +0200 Subject: [PATCH 20/44] Remove unneeded prints, format --- meilisearch-lib/src/document_formats.rs | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index a0b3c0552..83e5b9fdb 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -47,7 +47,8 @@ impl Display for DocumentFormatError { let trim_input_prefix_len = 50; let trim_input_suffix_len = 85; - if serde_msg.len() > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() + if serde_msg.len() + > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() { serde_msg.replace_range( trim_input_prefix_len..serde_msg.len() - trim_input_suffix_len, @@ -143,14 +144,9 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { let content: ArrayOrSingleObject = serde_json::from_reader(reader) .map_err(Error::Json) - .map_err(|e| { - println!("Błąd o taki: {:#?}", e); - (PayloadType::Json, e) - })?; + .map_err(|e| (PayloadType::Json, e))?; - println!("content o taki: {:#?}", content); for object in content.inner.map_right(|o| vec![o]).into_inner() { - println!("{:#?}", object); builder .append_json_object(&object) .map_err(Into::into) @@ -158,8 +154,6 @@ pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result { } let count = builder.documents_count(); - println!("{count}"); - let _ = builder .into_inner() .map_err(Into::into) From 5a4f1508d0a09e8a7e5ad123deb8c96bf9f9e413 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Wed, 7 Sep 2022 18:16:33 +0200 Subject: [PATCH 21/44] Add documentation --- meilisearch-http/src/option.rs | 8 ++++++++ meilisearch-lib/src/options.rs | 1 + 2 files changed, 9 insertions(+) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index c16856b58..3351e8b92 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -242,15 +242,20 @@ impl Opt { !self.no_analytics } + /// Build a new Opt from config file, env vars and cli args. pub fn build() -> Self { + // Parse the args to get the config_file_path. let mut opts = Opt::parse(); if let Some(config_file_path) = opts.config_file_path.as_ref() { eprintln!("loading config file : {:?}", config_file_path); match std::fs::read(config_file_path) { Ok(config) => { + // If the arg is present, and the file successfully read, we deserialize it with `toml`. let opt_from_config = toml::from_slice::(&config).expect("can't read file"); + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. opts = Opt::parse(); } Err(err) => eprintln!("can't read {:?} : {}", config_file_path, err), @@ -260,6 +265,7 @@ impl Opt { opts } + /// Exports the opts values to their corresponding env vars if they are not set. fn export_to_env(self) { export_to_env_if_not_present(MEILI_DB_PATH, self.db_path); export_to_env_if_not_present(MEILI_HTTP_ADDR, self.http_addr); @@ -409,6 +415,8 @@ fn load_ocsp(filename: &Option) -> anyhow::Result> { Ok(ret) } +/// Functions used to get default value for `Opt` fields, needs to be function because of serde's default attribute. + fn default_db_path() -> PathBuf { PathBuf::from(DEFAULT_DB_PATH) } diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 4e208187d..5aa7edf37 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -59,6 +59,7 @@ pub struct SchedulerConfig { } impl IndexerOpts { + /// Exports the values to their corresponding env vars if they are not set. pub fn export_to_env(self) { if let Some(max_indexing_memory) = self.max_indexing_memory.0 { export_to_env_if_not_present( From 7f267ec4be1901102be5154222545500ff34fb2a Mon Sep 17 00:00:00 2001 From: mlemesle Date: Wed, 7 Sep 2022 20:22:49 +0200 Subject: [PATCH 22/44] Fix clippy --- meilisearch-http/src/option.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 3351e8b92..11a396904 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -232,7 +232,7 @@ pub struct Opt { /// Format must be TOML. #[serde(skip_serializing)] #[clap(long)] - config_file_path: Option, + pub config_file_path: Option, } impl Opt { From 579fa3f1add4730cad01c6c8e9b69e8f1c82d2d1 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Thu, 8 Sep 2022 11:05:52 +0200 Subject: [PATCH 23/44] Remove unnecessary println --- meilisearch-http/src/main.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index e74c1e056..147f526a2 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -30,8 +30,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> { let opt = Opt::build(); - println!("{:?}", opt); - setup(&opt)?; match opt.env.as_ref() { From a690ace36e31c27a36a02e472fa95a4aee540f3d Mon Sep 17 00:00:00 2001 From: mlemesle Date: Fri, 9 Sep 2022 09:37:23 +0200 Subject: [PATCH 24/44] Add example config.toml with default values --- config.toml | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 config.toml diff --git a/config.toml b/config.toml new file mode 100644 index 000000000..dcfaf835d --- /dev/null +++ b/config.toml @@ -0,0 +1,121 @@ +# This file shows the default configuration of Meilisearch. +# All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables + + +### DUMP + +# Folder where dumps are created when the dump route is called +# dumps-dir = "dumps/" + +# Ignore the dump if a database already exists, and load that database instead +# ignore-dump-if-db-exists = false + +# If the dump doesn't exists, load or create the database specified by `db-path` instead +# ignore-missing-dump = false + +# Import a dump from the specified path, must be a `.dump` file +# import-dump = "./path/to/my/file.dump" + + +### SNAPSHOT + +# The engine will ignore a missing snapshot and not return an error in such case +# ignore-missing-snapshot = false + +# The engine will skip snapshot importation and not return an error in such case +# ignore-snapshot-if-db-exists = false + +# Defines the path of the snapshot file to import. This option will, by default, stop the +# process if a database already exist or if no snapshot exists at the given path. If this +# option is not specified no snapshot is imported +# import-snapshot = false + +# Activate snapshot scheduling +# schedule-snapshot = false + +# Defines the directory path where meilisearch will create snapshot each snapshot_time_gap +# snapshot-dir = "snapshots/" + +# Defines time interval, in seconds, between each snapshot creation +# snapshot-interval-sec = 86400 + + +### INDEX + +# The maximum size, in bytes, of the main lmdb database directory +# max-index-size = "100 GiB" + +# The maximum amount of memory the indexer will use. It defaults to 2/3 of the available +# memory. It is recommended to use something like 80%-90% of the available memory, no +# more. +# +# In case the engine is unable to retrieve the available memory the engine will try to use +# the memory it needs but without real limit, this can lead to Out-Of-Memory issues and it +# is recommended to specify the amount of memory to use. +# +# /!\ The default value is system dependant /!\ +# max-indexing-memory = "2 GiB" + +# The maximum number of threads the indexer will use. If the number set is higher than the +# real number of cores available in the machine, it will use the maximum number of +# available cores. +# +# It defaults to half of the available threads. +# max-indexing-threads = 4 + + +### SSL + +# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE +# ssl-auth-path = "./path/to/root" + +# Read server certificates from CERTFILE. This should contain PEM-format certificates in +# the right order (the first certificate should certify KEYFILE, the last should be a root +# CA) +# ssl-cert-path = "./path/to/CERTFILE" + +# Read private key from KEYFILE. This should be a RSA private key or PKCS8-encoded +# private key, in PEM format +# ssl-key-path = "./path/to/private-key" + +# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional +# ssl-ocsp-path = "./path/to/OCSPFILE" + +# Send a fatal alert if the client does not complete client authentication +# ssl-require-auth = false + +# SSL support session resumption +# ssl-resumption = false + +# SSL support tickets +# ssl-tickets = false + + +### MISC + +# This environment variable must be set to `production` if you are running in production. +# If the server is running in development mode more logs will be displayed, and the master +# key can be avoided which implies that there is no security on the updates routes. This +# is useful to debug when integrating the engine with another service +# env = "development" # possible values: [development, production] + +# The address on which the http server will listen +# http-addr = "127.0.0.1:7700" + +# The maximum size, in bytes, of accepted JSON payloads +# http-payload-size-limit = 100000000 + +# The destination where the database must be created +# db-path = "./data.ms" + +# The engine will disable task auto-batching, and will sequencialy compute each task one by one +# disable-auto-batching = false + +# Set the log level +# log-level = "info" + +# The master key allowing you to do everything on the server +# master-key = "YOUR MASTER KEY" + +# The maximum size, in bytes, of the update lmdb database directory +# max-task-db-size = "100 GiB" From c2ab7a793918f5d538ab1e6006f9793003ece5ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar=20-=20curqui?= Date: Wed, 14 Sep 2022 14:40:36 +0200 Subject: [PATCH 25/44] Update config.yml --- .github/ISSUE_TEMPLATE/config.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1006a064d..3f6cb9462 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,13 +1,13 @@ contact_links: + - name: Support questions & other + url: https://github.com/meilisearch/meilisearch/discussions/new + about: For any other question, open a discussion in this repository - name: Language support request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal?discussions_q=label%3Aproduct%3Acore%3Atokenizer+category%3A%22Feedback+%26+Feature+Proposal%22 about: The requests and feedback regarding Language support are not managed in this repository. Please upvote the related discussion in our dedicated product repository or open a new one if it doesn't exist. - - name: Feature request & feedback + - name: Any other feature request & feedback url: https://github.com/meilisearch/product/discussions/categories/feedback-feature-proposal about: The feature requests and feedback regarding the already existing features are not managed in this repository. Please open a discussion in our dedicated product repository - name: Documentation issue url: https://github.com/meilisearch/documentation/issues/new about: For documentation issues, open an issue or a PR in the documentation repository - - name: Support questions & other - url: https://github.com/meilisearch/meilisearch/discussions/new - about: For any other question, open a discussion in this repository From 5b571147718d37f31ccc86e88f90d3d1894ece1b Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Wed, 14 Sep 2022 20:52:11 +0200 Subject: [PATCH 26/44] Bump milli from 0.33.0 to 0.33.4 --- Cargo.lock | 16 ++++++++-------- meilisearch-auth/Cargo.toml | 2 +- meilisearch-lib/Cargo.toml | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index babbd0ab2..7a0802e3e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1119,8 +1119,8 @@ dependencies = [ [[package]] name = "filter-parser" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "nom", "nom_locate", @@ -1144,8 +1144,8 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "serde_json", ] @@ -1657,8 +1657,8 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "serde_json", ] @@ -2195,8 +2195,8 @@ dependencies = [ [[package]] name = "milli" -version = "0.33.0" -source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.0#a79ff8a1a98a807f40f970131c8de2ab11560de5" +version = "0.33.4" +source = "git+https://github.com/meilisearch/milli.git?tag=v0.33.4#4fc6331cb6526c07f3137584564cfe3493fb25bd" dependencies = [ "bimap", "bincode", diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 4504180b4..470d5b8d1 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" enum-iterator = "0.7.0" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index 74c46979e..de967286c 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -28,7 +28,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.0" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" From 935f18efcfbad0196eb615bebfe7a71b7ea76bd4 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Wed, 14 Sep 2022 20:57:13 +0200 Subject: [PATCH 27/44] Allow building without specialized tokenizations (Some of) these specialized tokenizations include huge dictionaries that currently account for 90% (!) of the meilisearch binary size. This commit adds chinese, hebrew, japanese, and thai feature flags that are propagated via milli down to the charabia crate. To keep it backward compatible, they are enabled by default. Related to meilisearch/milli#632 --- meilisearch-auth/Cargo.toml | 2 +- meilisearch-http/Cargo.toml | 8 ++++++-- meilisearch-lib/Cargo.toml | 18 +++++++++++++++++- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/meilisearch-auth/Cargo.toml b/meilisearch-auth/Cargo.toml index 470d5b8d1..3bbc09c4a 100644 --- a/meilisearch-auth/Cargo.toml +++ b/meilisearch-auth/Cargo.toml @@ -7,7 +7,7 @@ edition = "2021" enum-iterator = "0.7.0" hmac = "0.12.1" meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } rand = "0.8.4" serde = { version = "1.0.136", features = ["derive"] } serde_json = { version = "1.0.85", features = ["preserve_order"] } diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 38f9a83fc..baea8b578 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -meilisearch-lib = { path = "../meilisearch-lib" } +meilisearch-lib = { path = "../meilisearch-lib", default-features = false } mimalloc = { version = "0.1.29", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" @@ -90,7 +90,7 @@ urlencoding = "2.1.0" yaup = "0.2.0" [features] -default = ["analytics", "mini-dashboard"] +default = ["analytics", "meilisearch-lib/default", "mini-dashboard"] metrics = ["prometheus"] analytics = ["segment"] mini-dashboard = [ @@ -104,6 +104,10 @@ mini-dashboard = [ "tempfile", "zip", ] +chinese = ["meilisearch-lib/chinese"] +hebrew = ["meilisearch-lib/hebrew"] +japanese = ["meilisearch-lib/japanese"] +thai = ["meilisearch-lib/thai"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.1/build.zip" diff --git a/meilisearch-lib/Cargo.toml b/meilisearch-lib/Cargo.toml index de967286c..bda3ecbc7 100644 --- a/meilisearch-lib/Cargo.toml +++ b/meilisearch-lib/Cargo.toml @@ -28,7 +28,7 @@ lazy_static = "1.4.0" log = "0.4.14" meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } -milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" } +milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false } mime = "0.3.16" num_cpus = "1.13.1" obkv = "0.2.0" @@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554 paste = "1.0.6" proptest = "1.0.0" proptest-derive = "0.3.0" + +[features] +# all specialized tokenizations +default = ["milli/default"] + +# chinese specialized tokenization +chinese = ["milli/chinese"] + +# hebrew specialized tokenization +hebrew = ["milli/hebrew"] + +# japanese specialized tokenization +japanese = ["milli/japanese"] + +# thai specialized tokenization +thai = ["milli/thai"] From 4dfae444780981550231404a19cd81eb117a0196 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Mon, 19 Sep 2022 18:16:28 +0200 Subject: [PATCH 28/44] Apply PR review comments --- config.toml | 156 ++++++++++++++++++--------------- meilisearch-http/src/main.rs | 13 ++- meilisearch-http/src/option.rs | 145 +++++++++++++++++++----------- meilisearch-lib/src/options.rs | 21 +++-- 4 files changed, 203 insertions(+), 132 deletions(-) diff --git a/config.toml b/config.toml index dcfaf835d..5d5ae4507 100644 --- a/config.toml +++ b/config.toml @@ -1,50 +1,55 @@ # This file shows the default configuration of Meilisearch. # All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables +# db_path = "./data.ms" +# The destination where the database must be created. + +# env = "development" # Possible values: [development, production] +# This environment variable must be set to `production` if you are running in production. +# More logs wiil be displayed if the server is running in development mode. Setting the master +# key is optional; hence no security on the updates routes. This +# is useful to debug when integrating the engine with another service. + +# http_addr = "127.0.0.1:7700" +# The address on which the HTTP server will listen. + +# master-key = "MASTER_KEY" +# Sets the instance's master key, automatically protecting all routes except GET /health. + +# no_analytics = false +# Do not send analytics to Meilisearch. + +# disable-auto-batching = false +# The engine will disable task auto-batching, and will sequencialy compute each task one by one. + ### DUMP -# Folder where dumps are created when the dump route is called # dumps-dir = "dumps/" +# Folder where dumps are created when the dump route is called. -# Ignore the dump if a database already exists, and load that database instead -# ignore-dump-if-db-exists = false - -# If the dump doesn't exists, load or create the database specified by `db-path` instead -# ignore-missing-dump = false - -# Import a dump from the specified path, must be a `.dump` file # import-dump = "./path/to/my/file.dump" +# Import a dump from the specified path, must be a `.dump` file. + +# ignore-missing-dump = false +# If the dump doesn't exist, load or create the database specified by `db-path` instead. + +# ignore-dump-if-db-exists = false +# Ignore the dump if a database already exists, and load that database instead. + +### -### SNAPSHOT - -# The engine will ignore a missing snapshot and not return an error in such case -# ignore-missing-snapshot = false - -# The engine will skip snapshot importation and not return an error in such case -# ignore-snapshot-if-db-exists = false - -# Defines the path of the snapshot file to import. This option will, by default, stop the -# process if a database already exist or if no snapshot exists at the given path. If this -# option is not specified no snapshot is imported -# import-snapshot = false - -# Activate snapshot scheduling -# schedule-snapshot = false - -# Defines the directory path where meilisearch will create snapshot each snapshot_time_gap -# snapshot-dir = "snapshots/" - -# Defines time interval, in seconds, between each snapshot creation -# snapshot-interval-sec = 86400 +# log-level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] +# Set the log level. ### INDEX -# The maximum size, in bytes, of the main lmdb database directory # max-index-size = "100 GiB" +# The maximum size, in bytes, of the main LMDB database directory. +# max-indexing-memory = "2 GiB" # The maximum amount of memory the indexer will use. It defaults to 2/3 of the available # memory. It is recommended to use something like 80%-90% of the available memory, no # more. @@ -54,68 +59,73 @@ # is recommended to specify the amount of memory to use. # # /!\ The default value is system dependant /!\ -# max-indexing-memory = "2 GiB" +# max-indexing-threads = 4 # The maximum number of threads the indexer will use. If the number set is higher than the # real number of cores available in the machine, it will use the maximum number of # available cores. # # It defaults to half of the available threads. -# max-indexing-threads = 4 + +### + + +# max-task-db-size = "100 GiB" +# The maximum size, in bytes, of the update LMDB database directory. + +# http-payload-size-limit = 100000000 +# The maximum size, in bytes, of accepted JSON payloads. + + +### SNAPSHOT + +# schedule-snapshot = false +# Activate snapshot scheduling. + +# snapshot-dir = "snapshots/" +# Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. + +# snapshot-interval-sec = 86400 +# Defines time interval, in seconds, between each snapshot creation. + +# import-snapshot = false +# Defines the path of the snapshot file to import. This option will, by default, stop the +# process if a database already exist, or if no snapshot exists at the given path. If this +# option is not specified, no snapshot is imported. + +# ignore-missing-snapshot = false +# The engine will ignore a missing snapshot and not return an error in such a case. + +# ignore-snapshot-if-db-exists = false +# The engine will skip snapshot importation and not return an error in such a case. + +### ### SSL -# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE # ssl-auth-path = "./path/to/root" +# Enable client authentication, and accept certificates signed by those roots provided in CERTFILE. +# ssl-cert-path = "./path/to/CERTFILE" # Read server certificates from CERTFILE. This should contain PEM-format certificates in # the right order (the first certificate should certify KEYFILE, the last should be a root -# CA) -# ssl-cert-path = "./path/to/CERTFILE" +# CA). -# Read private key from KEYFILE. This should be a RSA private key or PKCS8-encoded -# private key, in PEM format # ssl-key-path = "./path/to/private-key" - -# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional +# Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded +# private key, in PEM format. + # ssl-ocsp-path = "./path/to/OCSPFILE" +# Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional. -# Send a fatal alert if the client does not complete client authentication # ssl-require-auth = false - -# SSL support session resumption +# Send a fatal alert if the client does not complete client authentication. + # ssl-resumption = false - -# SSL support tickets +# SSL support session resumption. + # ssl-tickets = false +# SSL support tickets. - -### MISC - -# This environment variable must be set to `production` if you are running in production. -# If the server is running in development mode more logs will be displayed, and the master -# key can be avoided which implies that there is no security on the updates routes. This -# is useful to debug when integrating the engine with another service -# env = "development" # possible values: [development, production] - -# The address on which the http server will listen -# http-addr = "127.0.0.1:7700" - -# The maximum size, in bytes, of accepted JSON payloads -# http-payload-size-limit = 100000000 - -# The destination where the database must be created -# db-path = "./data.ms" - -# The engine will disable task auto-batching, and will sequencialy compute each task one by one -# disable-auto-batching = false - -# Set the log level -# log-level = "info" - -# The master key allowing you to do everything on the server -# master-key = "YOUR MASTER KEY" - -# The maximum size, in bytes, of the update lmdb database directory -# max-task-db-size = "100 GiB" +### diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 147f526a2..01cf39a2f 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,4 +1,5 @@ use std::env; +use std::path::PathBuf; use std::sync::Arc; use actix_web::http::KeepAlive; @@ -28,7 +29,7 @@ fn setup(opt: &Opt) -> anyhow::Result<()> { #[actix_web::main] async fn main() -> anyhow::Result<()> { - let opt = Opt::build(); + let (opt, config_read_from) = Opt::try_build()?; setup(&opt)?; @@ -57,7 +58,7 @@ async fn main() -> anyhow::Result<()> { #[cfg(any(debug_assertions, not(feature = "analytics")))] let (analytics, user) = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, &user); + print_launch_resume(&opt, &user, config_read_from); run_http(meilisearch, auth_controller, opt, analytics).await?; @@ -96,7 +97,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume(opt: &Opt, user: &str) { +pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { @@ -117,6 +118,12 @@ pub fn print_launch_resume(opt: &Opt, user: &str) { eprintln!("{}", ascii_name); + eprintln!( + "Config file path:\t{}", + config_read_from + .map(|config_file_path| config_file_path.display().to_string()) + .unwrap_or_else(|| "none".to_string()) + ); eprintln!("Database path:\t\t{:?}", opt.db_path); eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr); eprintln!("Environment:\t\t{:?}", opt.env); diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 11a396904..1f676813a 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -64,6 +64,7 @@ const DEFAULT_LOG_LEVEL: &str = "info"; #[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct Opt { /// The destination where the database must be created. #[clap(long, env = MEILI_DB_PATH, default_value_os_t = default_db_path())] @@ -75,15 +76,15 @@ pub struct Opt { #[serde(default = "default_http_addr")] pub http_addr: String, - /// The master key allowing you to do everything on the server. + /// Sets the instance's master key, automatically protecting all routes except GET /health #[serde(skip_serializing)] #[clap(long, env = MEILI_MASTER_KEY)] pub master_key: Option, /// This environment variable must be set to `production` if you are running in production. - /// If the server is running in development mode more logs will be displayed, - /// and the master key can be avoided which implies that there is no security on the updates routes. - /// This is useful to debug when integrating the engine with another service. + /// More logs wiil be displayed if the server is running in development mode. Setting the master + /// key is optional; hence no security on the updates routes. This + /// is useful to debug when integrating the engine with another service #[clap(long, env = MEILI_ENV, default_value_t = default_env(), possible_values = &POSSIBLE_ENV)] #[serde(default = "default_env")] pub env: String, @@ -94,12 +95,12 @@ pub struct Opt { #[clap(long, env = MEILI_NO_ANALYTICS)] pub no_analytics: bool, - /// The maximum size, in bytes, of the main lmdb database directory + /// The maximum size, in bytes, of the main LMDB database directory #[clap(long, env = MEILI_MAX_INDEX_SIZE, default_value_t = default_max_index_size())] #[serde(default = "default_max_index_size")] pub max_index_size: Byte, - /// The maximum size, in bytes, of the update lmdb database directory + /// The maximum size, in bytes, of the update LMDB database directory #[clap(long, env = MEILI_MAX_TASK_DB_SIZE, default_value_t = default_max_task_db_size())] #[serde(default = "default_max_task_db_size")] pub max_task_db_size: Byte, @@ -117,7 +118,7 @@ pub struct Opt { #[clap(long, env = MEILI_SSL_CERT_PATH, parse(from_os_str))] pub ssl_cert_path: Option, - /// Read private key from KEYFILE. This should be a RSA + /// Read the private key from KEYFILE. This should be an RSA /// private key or PKCS8-encoded private key, in PEM format. #[serde(skip_serializing)] #[clap(long, env = MEILI_SSL_KEY_PATH, parse(from_os_str))] @@ -151,12 +152,12 @@ pub struct Opt { pub ssl_tickets: bool, /// Defines the path of the snapshot file to import. - /// This option will, by default, stop the process if a database already exist or if no snapshot exists at - /// the given path. If this option is not specified no snapshot is imported. + /// This option will, by default, stop the process if a database already exists or if no snapshot exists at + /// the given path. If this option is not specified, no snapshot is imported. #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, - /// The engine will ignore a missing snapshot and not return an error in such case. + /// The engine will ignore a missing snapshot and not return an error in such a case. #[clap( long, env = MEILI_IGNORE_MISSING_SNAPSHOT, @@ -174,7 +175,7 @@ pub struct Opt { #[serde(default)] pub ignore_snapshot_if_db_exists: bool, - /// Defines the directory path where meilisearch will create snapshot each snapshot_time_gap. + /// Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. #[clap(long, env = MEILI_SNAPSHOT_DIR, default_value_os_t = default_snapshot_dir())] #[serde(default = "default_snapshot_dir")] pub snapshot_dir: PathBuf, @@ -194,7 +195,7 @@ pub struct Opt { #[clap(long, env = MEILI_IMPORT_DUMP, conflicts_with = "import-snapshot")] pub import_dump: Option, - /// If the dump doesn't exists, load or create the database specified by `db-path` instead. + /// If the dump doesn't exist, load or create the database specified by `db-path` instead. #[clap(long, env = MEILI_IGNORE_MISSING_DUMP, requires = "import-dump")] #[serde(default)] pub ignore_missing_dump: bool, @@ -209,7 +210,7 @@ pub struct Opt { #[serde(default = "default_dumps_dir")] pub dumps_dir: PathBuf, - /// Set the log level + /// Set the log level. # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] #[clap(long, env = MEILI_LOG_LEVEL, default_value_t = default_log_level())] #[serde(default = "default_log_level")] pub log_level: String, @@ -243,78 +244,124 @@ impl Opt { } /// Build a new Opt from config file, env vars and cli args. - pub fn build() -> Self { + pub fn try_build() -> anyhow::Result<(Self, Option)> { // Parse the args to get the config_file_path. let mut opts = Opt::parse(); - if let Some(config_file_path) = opts.config_file_path.as_ref() { - eprintln!("loading config file : {:?}", config_file_path); - match std::fs::read(config_file_path) { + let mut config_read_from = None; + if let Some(config_file_path) = opts + .config_file_path + .clone() + .or_else(|| Some(PathBuf::from("./config.toml"))) + { + match std::fs::read(&config_file_path) { Ok(config) => { - // If the arg is present, and the file successfully read, we deserialize it with `toml`. - let opt_from_config = - toml::from_slice::(&config).expect("can't read file"); - // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. - opt_from_config.export_to_env(); - // Once injected we parse the cli args once again to take the new env vars into scope. - opts = Opt::parse(); + // If the file is successfully read, we deserialize it with `toml`. + match toml::from_slice::(&config) { + Ok(opt_from_config) => { + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. + opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. + opts = Opt::parse(); + config_read_from = Some(config_file_path); + } + // If we have an error deserializing the file defined by the user. + Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err), + _ => (), + } } - Err(err) => eprintln!("can't read {:?} : {}", config_file_path, err), + // If we have an error while reading the file defined by the user. + Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err), + _ => (), } } - opts + Ok((opts, config_read_from)) } /// Exports the opts values to their corresponding env vars if they are not set. fn export_to_env(self) { - export_to_env_if_not_present(MEILI_DB_PATH, self.db_path); - export_to_env_if_not_present(MEILI_HTTP_ADDR, self.http_addr); - if let Some(master_key) = self.master_key { + let Opt { + db_path, + http_addr, + master_key, + env, + max_index_size, + max_task_db_size, + http_payload_size_limit, + ssl_cert_path, + ssl_key_path, + ssl_auth_path, + ssl_ocsp_path, + ssl_require_auth, + ssl_resumption, + ssl_tickets, + snapshot_dir, + schedule_snapshot, + snapshot_interval_sec, + dumps_dir, + log_level, + indexer_options, + scheduler_options, + import_snapshot: _, + ignore_missing_snapshot: _, + ignore_snapshot_if_db_exists: _, + import_dump: _, + ignore_missing_dump: _, + ignore_dump_if_db_exists: _, + config_file_path: _, + #[cfg(all(not(debug_assertions), feature = "analytics"))] + no_analytics, + #[cfg(feature = "metrics")] + enable_metrics_route, + } = self; + export_to_env_if_not_present(MEILI_DB_PATH, db_path); + export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr); + if let Some(master_key) = master_key { export_to_env_if_not_present(MEILI_MASTER_KEY, master_key); } - export_to_env_if_not_present(MEILI_ENV, self.env); + export_to_env_if_not_present(MEILI_ENV, env); #[cfg(all(not(debug_assertions), feature = "analytics"))] { - export_to_env_if_not_present(MEILI_NO_ANALYTICS, self.no_analytics.to_string()); + export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string()); } - export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, self.max_index_size.to_string()); - export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, self.max_task_db_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_INDEX_SIZE, max_index_size.to_string()); + export_to_env_if_not_present(MEILI_MAX_TASK_DB_SIZE, max_task_db_size.to_string()); export_to_env_if_not_present( MEILI_HTTP_PAYLOAD_SIZE_LIMIT, - self.http_payload_size_limit.to_string(), + http_payload_size_limit.to_string(), ); - if let Some(ssl_cert_path) = self.ssl_cert_path { + if let Some(ssl_cert_path) = ssl_cert_path { export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path); } - if let Some(ssl_key_path) = self.ssl_key_path { + if let Some(ssl_key_path) = ssl_key_path { export_to_env_if_not_present(MEILI_SSL_KEY_PATH, ssl_key_path); } - if let Some(ssl_auth_path) = self.ssl_auth_path { + if let Some(ssl_auth_path) = ssl_auth_path { export_to_env_if_not_present(MEILI_SSL_AUTH_PATH, ssl_auth_path); } - if let Some(ssl_ocsp_path) = self.ssl_ocsp_path { + if let Some(ssl_ocsp_path) = ssl_ocsp_path { export_to_env_if_not_present(MEILI_SSL_OCSP_PATH, ssl_ocsp_path); } - export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, self.ssl_require_auth.to_string()); - export_to_env_if_not_present(MEILI_SSL_RESUMPTION, self.ssl_resumption.to_string()); - export_to_env_if_not_present(MEILI_SSL_TICKETS, self.ssl_tickets.to_string()); - export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, self.snapshot_dir); - export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, self.schedule_snapshot.to_string()); + export_to_env_if_not_present(MEILI_SSL_REQUIRE_AUTH, ssl_require_auth.to_string()); + export_to_env_if_not_present(MEILI_SSL_RESUMPTION, ssl_resumption.to_string()); + export_to_env_if_not_present(MEILI_SSL_TICKETS, ssl_tickets.to_string()); + export_to_env_if_not_present(MEILI_SNAPSHOT_DIR, snapshot_dir); + export_to_env_if_not_present(MEILI_SCHEDULE_SNAPSHOT, schedule_snapshot.to_string()); export_to_env_if_not_present( MEILI_SNAPSHOT_INTERVAL_SEC, - self.snapshot_interval_sec.to_string(), + snapshot_interval_sec.to_string(), ); - export_to_env_if_not_present(MEILI_DUMPS_DIR, self.dumps_dir); - export_to_env_if_not_present(MEILI_LOG_LEVEL, self.log_level); + export_to_env_if_not_present(MEILI_DUMPS_DIR, dumps_dir); + export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level); #[cfg(feature = "metrics")] { export_to_env_if_not_present( MEILI_ENABLE_METRICS_ROUTE, - self.enable_metrics_route.to_string(), + enable_metrics_route.to_string(), ); } - self.indexer_options.export_to_env(); - self.scheduler_options.export_to_env(); + indexer_options.export_to_env(); + scheduler_options.export_to_env(); } pub fn get_ssl_config(&self) -> anyhow::Result> { diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index 5aa7edf37..d75e02b39 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -12,10 +12,10 @@ use sysinfo::{RefreshKind, System, SystemExt}; const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY"; const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS"; const DISABLE_AUTO_BATCHING: &str = "DISABLE_AUTO_BATCHING"; - const DEFAULT_LOG_EVERY_N: usize = 100000; #[derive(Debug, Clone, Parser, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct IndexerOpts { /// The amount of documents to skip before printing /// a log regarding the indexing advancement. @@ -50,6 +50,7 @@ pub struct IndexerOpts { } #[derive(Debug, Clone, Parser, Default, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", deny_unknown_fields)] pub struct SchedulerConfig { /// The engine will disable task auto-batching, /// and will sequencialy compute each task one by one. @@ -61,7 +62,13 @@ pub struct SchedulerConfig { impl IndexerOpts { /// Exports the values to their corresponding env vars if they are not set. pub fn export_to_env(self) { - if let Some(max_indexing_memory) = self.max_indexing_memory.0 { + let IndexerOpts { + max_indexing_memory, + max_indexing_threads, + log_every_n: _, + max_nb_chunks: _, + } = self; + if let Some(max_indexing_memory) = max_indexing_memory.0 { export_to_env_if_not_present( MEILI_MAX_INDEXING_MEMORY, max_indexing_memory.to_string(), @@ -69,7 +76,7 @@ impl IndexerOpts { } export_to_env_if_not_present( MEILI_MAX_INDEXING_THREADS, - self.max_indexing_threads.0.to_string(), + max_indexing_threads.0.to_string(), ); } } @@ -106,10 +113,10 @@ impl Default for IndexerOpts { impl SchedulerConfig { pub fn export_to_env(self) { - export_to_env_if_not_present( - DISABLE_AUTO_BATCHING, - self.disable_auto_batching.to_string(), - ); + let SchedulerConfig { + disable_auto_batching, + } = self; + export_to_env_if_not_present(DISABLE_AUTO_BATCHING, disable_auto_batching.to_string()); } } From d406fe901b365a8b035bfb27fa2a5331d1d0bd71 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Wed, 21 Sep 2022 10:55:16 +0200 Subject: [PATCH 29/44] Pass config.toml keys to snake_case --- config.toml | 54 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/config.toml b/config.toml index 5d5ae4507..6d7f1af14 100644 --- a/config.toml +++ b/config.toml @@ -13,43 +13,43 @@ # http_addr = "127.0.0.1:7700" # The address on which the HTTP server will listen. -# master-key = "MASTER_KEY" +# master_key = "MASTER_KEY" # Sets the instance's master key, automatically protecting all routes except GET /health. # no_analytics = false # Do not send analytics to Meilisearch. -# disable-auto-batching = false +# disable_auto_batching = false # The engine will disable task auto-batching, and will sequencialy compute each task one by one. ### DUMP -# dumps-dir = "dumps/" +# dumps_dir = "dumps/" # Folder where dumps are created when the dump route is called. -# import-dump = "./path/to/my/file.dump" +# import_dump = "./path/to/my/file.dump" # Import a dump from the specified path, must be a `.dump` file. -# ignore-missing-dump = false -# If the dump doesn't exist, load or create the database specified by `db-path` instead. +# ignore_missing_dump = false +# If the dump doesn't exist, load or create the database specified by `db_path` instead. -# ignore-dump-if-db-exists = false +# ignore_dump_if_db_exists = false # Ignore the dump if a database already exists, and load that database instead. ### -# log-level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] +# log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] # Set the log level. ### INDEX -# max-index-size = "100 GiB" +# max_index_size = "100 GiB" # The maximum size, in bytes, of the main LMDB database directory. -# max-indexing-memory = "2 GiB" +# max_indexing_memory = "2 GiB" # The maximum amount of memory the indexer will use. It defaults to 2/3 of the available # memory. It is recommended to use something like 80%-90% of the available memory, no # more. @@ -60,7 +60,7 @@ # # /!\ The default value is system dependant /!\ -# max-indexing-threads = 4 +# max_indexing_threads = 4 # The maximum number of threads the indexer will use. If the number set is higher than the # real number of cores available in the machine, it will use the maximum number of # available cores. @@ -70,33 +70,33 @@ ### -# max-task-db-size = "100 GiB" +# max_task_db_size = "100 GiB" # The maximum size, in bytes, of the update LMDB database directory. -# http-payload-size-limit = 100000000 +# http_payload_size_limit = 100000000 # The maximum size, in bytes, of accepted JSON payloads. ### SNAPSHOT -# schedule-snapshot = false +# schedule_snapshot = false # Activate snapshot scheduling. -# snapshot-dir = "snapshots/" -# Defines the directory path where Meilisearch will create a snapshot each snapshot-interval-sec. +# snapshot_dir = "snapshots/" +# Defines the directory path where Meilisearch will create a snapshot each snapshot_interval_sec. -# snapshot-interval-sec = 86400 +# snapshot_interval_sec = 86400 # Defines time interval, in seconds, between each snapshot creation. -# import-snapshot = false +# import_snapshot = false # Defines the path of the snapshot file to import. This option will, by default, stop the # process if a database already exist, or if no snapshot exists at the given path. If this # option is not specified, no snapshot is imported. -# ignore-missing-snapshot = false +# ignore_missing_snapshot = false # The engine will ignore a missing snapshot and not return an error in such a case. -# ignore-snapshot-if-db-exists = false +# ignore_snapshot_if_db_exists = false # The engine will skip snapshot importation and not return an error in such a case. ### @@ -104,28 +104,28 @@ ### SSL -# ssl-auth-path = "./path/to/root" +# ssl_auth_path = "./path/to/root" # Enable client authentication, and accept certificates signed by those roots provided in CERTFILE. -# ssl-cert-path = "./path/to/CERTFILE" +# ssl_cert_path = "./path/to/CERTFILE" # Read server certificates from CERTFILE. This should contain PEM-format certificates in # the right order (the first certificate should certify KEYFILE, the last should be a root # CA). -# ssl-key-path = "./path/to/private-key" +# ssl_key_path = "./path/to/private-key" # Read the private key from KEYFILE. This should be an RSA private key or PKCS8-encoded # private key, in PEM format. -# ssl-ocsp-path = "./path/to/OCSPFILE" +# ssl_ocsp_path = "./path/to/OCSPFILE" # Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional. -# ssl-require-auth = false +# ssl_require_auth = false # Send a fatal alert if the client does not complete client authentication. -# ssl-resumption = false +# ssl_resumption = false # SSL support session resumption. -# ssl-tickets = false +# ssl_tickets = false # SSL support tickets. ### From d3b984d86210f31d81e73b9be54cc9d038a0332b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar?= Date: Wed, 21 Sep 2022 11:03:01 +0200 Subject: [PATCH 30/44] Update CI to send a signal to Cloud team when Docker image is pushed Co-authored-by: Samuel Jimenez --- .github/workflows/publish-docker-images.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 88605bee1..449aec020 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -62,10 +62,19 @@ jobs: type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' }} - name: Build and push - id: docker_build uses: docker/build-push-action@v3 with: # We do not push tags for the cron jobs, this is only for test purposes push: ${{ github.event_name != 'schedule' }} platforms: linux/amd64,linux/arm64 tags: ${{ steps.meta.outputs.tags }} + + # /!\ Don't touch this without checking with Cloud team + - name: Send CI information to Cloud team + if: github.event_name != 'schedule' + uses: peter-evans/repository-dispatch@v2 + with: + token: ${{ secrets.MEILI_BOT_GH_PAT }} + repository: meilisearch/meilisearch-cloud + event-type: cloud-docker-build + client-payload: '{ "meilisearch_version": "${{ steps.meta.outputs.tags }}", "stable": "${{ steps.check-tag-format.outputs.stable }}" }' From 740926e7474a603f77c770488265d331032ad3b7 Mon Sep 17 00:00:00 2001 From: Kian-Meng Ang Date: Tue, 20 Sep 2022 22:39:35 +0800 Subject: [PATCH 31/44] Fix typos Found via `codespell -L crate,nam,hart,succeded`. --- .github/scripts/is-latest-release.sh | 2 +- .github/workflows/milestone-workflow.yml | 12 ++++++------ .github/workflows/publish-docker-images.yml | 2 +- CONTRIBUTING.md | 2 +- meilisearch-http/src/analytics/segment_analytics.rs | 10 +++++----- meilisearch-http/src/option.rs | 2 +- meilisearch-lib/src/dump/compat/v2.rs | 2 +- meilisearch-lib/src/dump/loaders/v4.rs | 4 ++-- meilisearch-lib/src/index/search.rs | 2 +- meilisearch-lib/src/index_resolver/index_store.rs | 2 +- meilisearch-lib/src/tasks/task_store/store.rs | 2 +- 11 files changed, 21 insertions(+), 21 deletions(-) diff --git a/.github/scripts/is-latest-release.sh b/.github/scripts/is-latest-release.sh index 81534a2f7..54f0a9d3a 100644 --- a/.github/scripts/is-latest-release.sh +++ b/.github/scripts/is-latest-release.sh @@ -85,7 +85,7 @@ get_latest() { latest="" current_tag="" for release_info in $releases; do - if [ $i -eq 0 ]; then # Cheking tag_name + if [ $i -eq 0 ]; then # Checking tag_name if echo "$release_info" | grep -q "$GREP_SEMVER_REGEXP"; then # If it's not an alpha or beta release current_tag=$release_info else diff --git a/.github/workflows/milestone-workflow.yml b/.github/workflows/milestone-workflow.yml index 4d0425f14..4cb87684d 100644 --- a/.github/workflows/milestone-workflow.yml +++ b/.github/workflows/milestone-workflow.yml @@ -62,12 +62,12 @@ jobs: - uses: actions/checkout@v3 - name: Download the issue template run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/roadmap-issue.md > $ISSUE_TEMPLATE - - name: Replace all empty occurences in the templates + - name: Replace all empty occurrences in the templates run: | - # Replace all <> occurences + # Replace all <> occurrences sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE - # Replace all <> occurences + # Replace all <> occurrences milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE @@ -95,12 +95,12 @@ jobs: - uses: actions/checkout@v3 - name: Download the issue template run: curl -s https://raw.githubusercontent.com/meilisearch/core-team/main/issue-templates/changelog-issue.md > $ISSUE_TEMPLATE - - name: Replace all empty occurences in the templates + - name: Replace all empty occurrences in the templates run: | - # Replace all <> occurences + # Replace all <> occurrences sed -i "s/<>/$MILESTONE_VERSION/g" $ISSUE_TEMPLATE - # Replace all <> occurences + # Replace all <> occurrences milestone_id=$(echo $MILESTONE_URL | cut -d '/' -f 7) sed -i "s/<>/$milestone_id/g" $ISSUE_TEMPLATE - name: Create the issue diff --git a/.github/workflows/publish-docker-images.yml b/.github/workflows/publish-docker-images.yml index 449aec020..f2e119a6d 100644 --- a/.github/workflows/publish-docker-images.yml +++ b/.github/workflows/publish-docker-images.yml @@ -53,7 +53,7 @@ jobs: uses: docker/metadata-action@v4 with: images: getmeili/meilisearch - # The lastest and `vX.Y` tags are only pushed for the official Meilisearch releases + # The latest and `vX.Y` tags are only pushed for the official Meilisearch releases # See https://github.com/docker/metadata-action#latest-tag flavor: latest=false tags: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c40c7dac..bf433eb09 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -102,7 +102,7 @@ The full Meilisearch release process is described in [this guide](https://github ### Release assets For each release, the following assets are created: -- Binaries for differents platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release +- Binaries for different platforms (Linux, MacOS, Windows and ARM architectures) are attached to the GitHub release - Binaries are pushed to HomeBrew and APT (not published for RC) - Docker tags are created/updated: - `vX.Y.Z` diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index f0dfd0fab..7b76cdd80 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -349,16 +349,16 @@ pub struct SearchAggregator { // sort sort_with_geo_point: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains sort_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, // filter filter_with_geo_radius: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains + // every time a request has a filter, this field must be incremented by the number of terms it contains filter_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one + // every time a request has a filter, this field must be incremented by one filter_total_number_of_criteria: usize, used_syntax: HashMap, @@ -366,7 +366,7 @@ pub struct SearchAggregator { // The maximum number of terms in a q request max_terms_number: usize, - // everytime a search is done, we increment the counter linked to the used settings + // every time a search is done, we increment the counter linked to the used settings matching_strategy: HashMap, // pagination diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index bdfa283a6..31942aeec 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -169,7 +169,7 @@ pub struct Opt { } impl Opt { - /// Wether analytics should be enabled or not. + /// Whether analytics should be enabled or not. #[cfg(all(not(debug_assertions), feature = "analytics"))] pub fn analytics(&self) -> bool { !self.no_analytics diff --git a/meilisearch-lib/src/dump/compat/v2.rs b/meilisearch-lib/src/dump/compat/v2.rs index 364d894c4..ba3b8e3a6 100644 --- a/meilisearch-lib/src/dump/compat/v2.rs +++ b/meilisearch-lib/src/dump/compat/v2.rs @@ -145,7 +145,7 @@ pub fn error_code_from_str(s: &str) -> anyhow::Result { "unsupported_media_type" => Code::UnsupportedMediaType, "dump_already_in_progress" => Code::DumpAlreadyInProgress, "dump_process_failed" => Code::DumpProcessFailed, - _ => bail!("unknow error code."), + _ => bail!("unknown error code."), }; Ok(code) diff --git a/meilisearch-lib/src/dump/loaders/v4.rs b/meilisearch-lib/src/dump/loaders/v4.rs index 0744df7ea..44ec23517 100644 --- a/meilisearch-lib/src/dump/loaders/v4.rs +++ b/meilisearch-lib/src/dump/loaders/v4.rs @@ -57,10 +57,10 @@ fn patch_updates(src: impl AsRef, dst: impl AsRef) -> anyhow::Result let updates_path = src.as_ref().join("updates/data.jsonl"); let output_updates_path = dst.as_ref().join("updates/data.jsonl"); create_dir_all(output_updates_path.parent().unwrap())?; - let udpates_file = File::open(updates_path)?; + let updates_file = File::open(updates_path)?; let mut output_update_file = File::create(output_updates_path)?; - serde_json::Deserializer::from_reader(udpates_file) + serde_json::Deserializer::from_reader(updates_file) .into_iter::() .try_for_each(|task| -> anyhow::Result<()> { let task: Task = task?.into(); diff --git a/meilisearch-lib/src/index/search.rs b/meilisearch-lib/src/index/search.rs index 57171d529..1a9aa1d0d 100644 --- a/meilisearch-lib/src/index/search.rs +++ b/meilisearch-lib/src/index/search.rs @@ -27,7 +27,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "".to_string(); -/// The maximimum number of results that the engine +/// The maximum number of results that the engine /// will be able to return in one search call. pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000; diff --git a/meilisearch-lib/src/index_resolver/index_store.rs b/meilisearch-lib/src/index_resolver/index_store.rs index e4f58f130..ea3c7125a 100644 --- a/meilisearch-lib/src/index_resolver/index_store.rs +++ b/meilisearch-lib/src/index_resolver/index_store.rs @@ -51,7 +51,7 @@ impl MapIndexStore { #[async_trait::async_trait] impl IndexStore for MapIndexStore { async fn create(&self, uuid: Uuid) -> Result { - // We need to keep the lock until we are sure the db file has been opened correclty, to + // We need to keep the lock until we are sure the db file has been opened correctly, to // ensure that another db is not created at the same time. let mut lock = self.index_store.write().await; diff --git a/meilisearch-lib/src/tasks/task_store/store.rs b/meilisearch-lib/src/tasks/task_store/store.rs index 24d0d3a65..32b20aeb8 100644 --- a/meilisearch-lib/src/tasks/task_store/store.rs +++ b/meilisearch-lib/src/tasks/task_store/store.rs @@ -63,7 +63,7 @@ impl Store { /// Returns the id for the next task. /// /// The required `mut txn` acts as a reservation system. It guarantees that as long as you commit - /// the task to the store in the same transaction, no one else will hav this task id. + /// the task to the store in the same transaction, no one else will have this task id. pub fn next_task_id(&self, txn: &mut RwTxn) -> Result { let id = self .tasks From 56d72d449337ee6c39b51d753d0d3127d937d299 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Wed, 21 Sep 2022 16:31:16 +0200 Subject: [PATCH 32/44] Uncomment static default values and fix typo --- config.toml | 46 ++++++++++++++++------------------ meilisearch-http/src/option.rs | 6 ++--- meilisearch-lib/src/options.rs | 4 +-- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/config.toml b/config.toml index 6d7f1af14..8da71c70a 100644 --- a/config.toml +++ b/config.toml @@ -1,16 +1,16 @@ # This file shows the default configuration of Meilisearch. # All variables are defined here https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables -# db_path = "./data.ms" +db_path = "./data.ms" # The destination where the database must be created. -# env = "development" # Possible values: [development, production] +env = "development" # Possible values: [development, production] # This environment variable must be set to `production` if you are running in production. # More logs wiil be displayed if the server is running in development mode. Setting the master # key is optional; hence no security on the updates routes. This # is useful to debug when integrating the engine with another service. -# http_addr = "127.0.0.1:7700" +http_addr = "127.0.0.1:7700" # The address on which the HTTP server will listen. # master_key = "MASTER_KEY" @@ -19,40 +19,38 @@ # no_analytics = false # Do not send analytics to Meilisearch. -# disable_auto_batching = false +disable_auto_batching = false # The engine will disable task auto-batching, and will sequencialy compute each task one by one. ### DUMP -# dumps_dir = "dumps/" +dumps_dir = "dumps/" # Folder where dumps are created when the dump route is called. # import_dump = "./path/to/my/file.dump" # Import a dump from the specified path, must be a `.dump` file. -# ignore_missing_dump = false +ignore_missing_dump = false # If the dump doesn't exist, load or create the database specified by `db_path` instead. -# ignore_dump_if_db_exists = false +ignore_dump_if_db_exists = false # Ignore the dump if a database already exists, and load that database instead. ### -# log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] +log_level = "INFO" # Possible values: [ERROR, WARN, INFO, DEBUG, TRACE] # Set the log level. ### INDEX -# max_index_size = "100 GiB" +max_index_size = "100 GiB" # The maximum size, in bytes, of the main LMDB database directory. # max_indexing_memory = "2 GiB" -# The maximum amount of memory the indexer will use. It defaults to 2/3 of the available -# memory. It is recommended to use something like 80%-90% of the available memory, no -# more. +# The maximum amount of memory the indexer will use. # # In case the engine is unable to retrieve the available memory the engine will try to use # the memory it needs but without real limit, this can lead to Out-Of-Memory issues and it @@ -70,33 +68,33 @@ ### -# max_task_db_size = "100 GiB" +max_task_db_size = "100 GiB" # The maximum size, in bytes, of the update LMDB database directory. -# http_payload_size_limit = 100000000 +http_payload_size_limit = "100 MB" # The maximum size, in bytes, of accepted JSON payloads. ### SNAPSHOT -# schedule_snapshot = false +schedule_snapshot = false # Activate snapshot scheduling. -# snapshot_dir = "snapshots/" +snapshot_dir = "snapshots/" # Defines the directory path where Meilisearch will create a snapshot each snapshot_interval_sec. -# snapshot_interval_sec = 86400 +snapshot_interval_sec = 86400 # Defines time interval, in seconds, between each snapshot creation. -# import_snapshot = false +# import_snapshot = "./path/to/my/snapshot" # Defines the path of the snapshot file to import. This option will, by default, stop the -# process if a database already exist, or if no snapshot exists at the given path. If this +# process if a database already exists, or if no snapshot exists at the given path. If this # option is not specified, no snapshot is imported. -# ignore_missing_snapshot = false +ignore_missing_snapshot = false # The engine will ignore a missing snapshot and not return an error in such a case. -# ignore_snapshot_if_db_exists = false +ignore_snapshot_if_db_exists = false # The engine will skip snapshot importation and not return an error in such a case. ### @@ -119,13 +117,13 @@ # ssl_ocsp_path = "./path/to/OCSPFILE" # Read DER-encoded OCSP response from OCSPFILE and staple to certificate. Optional. -# ssl_require_auth = false +ssl_require_auth = false # Send a fatal alert if the client does not complete client authentication. -# ssl_resumption = false +ssl_resumption = false # SSL support session resumption. -# ssl_tickets = false +ssl_tickets = false # SSL support tickets. ### diff --git a/meilisearch-http/src/option.rs b/meilisearch-http/src/option.rs index 1f676813a..ff8c0d120 100644 --- a/meilisearch-http/src/option.rs +++ b/meilisearch-http/src/option.rs @@ -60,7 +60,7 @@ const DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT: &str = "100 MB"; const DEFAULT_SNAPSHOT_DIR: &str = "snapshots/"; const DEFAULT_SNAPSHOT_INTERVAL_SEC: u64 = 86400; const DEFAULT_DUMPS_DIR: &str = "dumps/"; -const DEFAULT_LOG_LEVEL: &str = "info"; +const DEFAULT_LOG_LEVEL: &str = "INFO"; #[derive(Debug, Clone, Parser, Serialize, Deserialize)] #[clap(version)] @@ -126,8 +126,8 @@ pub struct Opt { /// Enable client authentication, and accept certificates /// signed by those roots provided in CERTFILE. - #[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))] #[serde(skip_serializing)] + #[clap(long, env = MEILI_SSL_AUTH_PATH, parse(from_os_str))] pub ssl_auth_path: Option, /// Read DER-encoded OCSP response from OCSPFILE and staple to certificate. @@ -152,7 +152,7 @@ pub struct Opt { pub ssl_tickets: bool, /// Defines the path of the snapshot file to import. - /// This option will, by default, stop the process if a database already exists or if no snapshot exists at + /// This option will, by default, stop the process if a database already exists, or if no snapshot exists at /// the given path. If this option is not specified, no snapshot is imported. #[clap(long, env = MEILI_IMPORT_SNAPSHOT)] pub import_snapshot: Option, diff --git a/meilisearch-lib/src/options.rs b/meilisearch-lib/src/options.rs index d75e02b39..bd406fbdd 100644 --- a/meilisearch-lib/src/options.rs +++ b/meilisearch-lib/src/options.rs @@ -28,9 +28,7 @@ pub struct IndexerOpts { #[clap(long, hide = true)] pub max_nb_chunks: Option, - /// The maximum amount of memory the indexer will use. It defaults to 2/3 - /// of the available memory. It is recommended to use something like 80%-90% - /// of the available memory, no more. + /// The maximum amount of memory the indexer will use. /// /// In case the engine is unable to retrieve the available memory the engine will /// try to use the memory it needs but without real limit, this can lead to From 248d727e6137c42e73b4f0a1d15ae0d7e90404b8 Mon Sep 17 00:00:00 2001 From: mlemesle Date: Thu, 22 Sep 2022 09:44:28 +0200 Subject: [PATCH 33/44] Add quotes around file name and change error message --- meilisearch-http/src/main.rs | 2 +- meilisearch-http/src/option.rs | 23 ++++++++++------------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 01cf39a2f..b6f92ae28 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -119,7 +119,7 @@ pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option { // If the file is successfully read, we deserialize it with `toml`. - match toml::from_slice::(&config) { - Ok(opt_from_config) => { - // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. - opt_from_config.export_to_env(); - // Once injected we parse the cli args once again to take the new env vars into scope. - opts = Opt::parse(); - config_read_from = Some(config_file_path); - } - // If we have an error deserializing the file defined by the user. - Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err), - _ => (), - } + let opt_from_config = toml::from_slice::(&config)?; + // We inject the values from the toml in the corresponding env vars if needs be. Doing so, we respect the priority toml < env vars < cli args. + opt_from_config.export_to_env(); + // Once injected we parse the cli args once again to take the new env vars into scope. + opts = Opt::parse(); + config_read_from = Some(config_file_path); } // If we have an error while reading the file defined by the user. - Err(err) if opts.config_file_path.is_some() => anyhow::bail!(err), + Err(_) if opts.config_file_path.is_some() => anyhow::bail!( + "unable to open or read the {:?} configuration file.", + opts.config_file_path.unwrap().display().to_string() + ), _ => (), } } From d166a97d6784374e45db0e3e8043fe5fbe0b6596 Mon Sep 17 00:00:00 2001 From: Luna-meili <112891105+Luna-meili@users.noreply.github.com> Date: Thu, 22 Sep 2022 17:27:42 +0200 Subject: [PATCH 34/44] Update CONTRIBUTING.md for Hacktoberfest (#2793) * Update CONTRIBUTING.md * Update CONTRIBUTING.md Co-authored-by: Bruno Casali * Update CONTRIBUTING.md Co-authored-by: Bruno Casali Co-authored-by: Bruno Casali --- CONTRIBUTING.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1c40c7dac..93d5a2136 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -10,12 +10,22 @@ If Meilisearch does not offer optimized support for your language, please consid ## Table of Contents +- [Hacktoberfest 2022](#hacktoberfest-2022) - [Assumptions](#assumptions) - [How to Contribute](#how-to-contribute) - [Development Workflow](#development-workflow) - [Git Guidelines](#git-guidelines) - [Release Process (for internal team only)](#release-process-for-internal-team-only) +## Hacktoberfest 2022 + +It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳 + +Thanks so much for participating with Meilisearch this year! +1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.digitalocean.com/resources/qualitystandards)). Our reviewers will not consider any PR that doesn’t match that standard. +2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, there’s no need to panic; we will get around to your contribution. +3. There will be no issue assignment as we don’t want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best! + ## Assumptions 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.** From 2827ff7957675c3deb261d582168d1ca4cf6fe11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9mentine=20Urquizar=20-=20curqui?= Date: Thu, 22 Sep 2022 17:45:33 +0200 Subject: [PATCH 35/44] Update README.md with Hacktoberfest section (#2794) * Update README.md * Update README.md * Update README.md Co-authored-by: Bruno Casali Co-authored-by: Bruno Casali --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index f728d8a6b..2bbc3dfe1 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,14 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f 🔥 [**Try it!**](https://where2watch.meilisearch.com/) 🔥 +## 🎃 Hacktoberfest + +It’s Hacktoberfest 2022 @Meilisearch + +[Hacktoberfest](https://hacktoberfest.com/) is a celebration of the open-source community. This year, and for the third time in a row, Meilisearch is participating in this fantastic event. + +You’d like to contribute? Don’t hesitate to check out our [contributing guidelines](./CONTRIBUTING.md). + ## ✨ Features - **Search-as-you-type:** find search results in less than 50 milliseconds From 05f93541d833c6b90bb95a773da338e3838371b3 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Thu, 29 Sep 2022 01:42:10 +0200 Subject: [PATCH 36/44] Skip dashboard test if mini-dashboard feature is disabled Fixes the following error: cargo test --no-default-features ... error: couldn't read target/debug/build/meilisearch-http-ec029d8c902cf2cb/out/generated.rs: No such file or directory (os error 2) --> meilisearch-http/tests/dashboard/mod.rs:8:9 | 8 | include!(concat!(env!("OUT_DIR"), "/generated.rs")); | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | = note: this error originates in the macro `include` (in Nightly builds, run with -Z macro-backtrace for more info) error: could not compile `meilisearch-http` due to previous error --- meilisearch-http/tests/dashboard/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-http/tests/dashboard/mod.rs b/meilisearch-http/tests/dashboard/mod.rs index d097cfd4b..2699cd16f 100644 --- a/meilisearch-http/tests/dashboard/mod.rs +++ b/meilisearch-http/tests/dashboard/mod.rs @@ -1,5 +1,6 @@ use crate::common::Server; +#[cfg(feature = "mini-dashboard")] #[actix_rt::test] async fn dashboard_assets_load() { let server = Server::new().await; From 7905dae7ad591e60f90c0b803be54f852338020e Mon Sep 17 00:00:00 2001 From: meili-bot <74670311+meili-bot@users.noreply.github.com> Date: Thu, 29 Sep 2022 16:00:08 +0200 Subject: [PATCH 37/44] Update CONTRIBUTING.md --- CONTRIBUTING.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 93d5a2136..ca14750da 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -26,6 +26,8 @@ Thanks so much for participating with Meilisearch this year! 2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, there’s no need to panic; we will get around to your contribution. 3. There will be no issue assignment as we don’t want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best! +You can check out the longer, more complete guideline documentation [here](https://github.com/meilisearch/.github/blob/main/Hacktoberfest_2022_contributors_guidelines.md). + ## Assumptions 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)(PR) workflow.** From 61a518a3846a9916b1fa10b213ecf1aaa469636f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Wi=C5=9Bniewski?= Date: Thu, 29 Sep 2022 16:36:32 +0200 Subject: [PATCH 38/44] Fix #2680 - replace a meaningless serde message --- .../tests/documents/add_documents.rs | 4 ++-- meilisearch-lib/src/document_formats.rs | 17 ++++++++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 685428784..8e6ba44a9 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -372,7 +372,7 @@ async fn error_add_malformed_json_documents() { assert_eq!( response["message"], json!( - r#"The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`."# + r#"The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`."# ) ); assert_eq!(response["code"], json!("malformed_payload")); @@ -395,7 +395,7 @@ async fn error_add_malformed_json_documents() { assert_eq!(status_code, 400); assert_eq!( response["message"], - json!("The `json` payload provided is malformed. `Couldn't serialize document value: data did not match any variant of untagged enum Either`.") + json!("The `json` payload provided is malformed. `Couldn't serialize document value: data are neither an object nor a list of objects`.") ); assert_eq!(response["code"], json!("malformed_payload")); assert_eq!(response["type"], json!("invalid_request")); diff --git a/meilisearch-lib/src/document_formats.rs b/meilisearch-lib/src/document_formats.rs index 83e5b9fdb..cfc200019 100644 --- a/meilisearch-lib/src/document_formats.rs +++ b/meilisearch-lib/src/document_formats.rs @@ -8,6 +8,7 @@ use meilisearch_types::internal_error; use milli::documents::{DocumentsBatchBuilder, Error}; use milli::Object; use serde::Deserialize; +use serde_json::error::Category; type Result = std::result::Result; @@ -40,18 +41,24 @@ impl Display for DocumentFormatError { Self::Internal(e) => write!(f, "An internal error has occurred: `{}`.", e), Self::MalformedPayload(me, b) => match me.borrow() { Error::Json(se) => { + let mut message = match se.classify() { + Category::Data => { + "data are neither an object nor a list of objects".to_string() + } + _ => se.to_string(), + }; + // https://github.com/meilisearch/meilisearch/issues/2107 // The user input maybe insanely long. We need to truncate it. - let mut serde_msg = se.to_string(); let ellipsis = "..."; let trim_input_prefix_len = 50; let trim_input_suffix_len = 85; - if serde_msg.len() + if message.len() > trim_input_prefix_len + trim_input_suffix_len + ellipsis.len() { - serde_msg.replace_range( - trim_input_prefix_len..serde_msg.len() - trim_input_suffix_len, + message.replace_range( + trim_input_prefix_len..message.len() - trim_input_suffix_len, ellipsis, ); } @@ -59,7 +66,7 @@ impl Display for DocumentFormatError { write!( f, "The `{}` payload provided is malformed. `Couldn't serialize document value: {}`.", - b, serde_msg + b, message ) } _ => write!(f, "The `{}` payload provided is malformed: `{}`.", b, me), From 20589a41b5fa0590b6a9511e01ef5fed3e46000d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaras=C5=82a=C5=AD=20Viktor=C4=8Dyk?= Date: Sat, 1 Oct 2022 21:59:20 +0200 Subject: [PATCH 39/44] Rename receivedDocumentIds into matchedDocuments Changes DocumentDeletion task details response. --- meilisearch-http/src/task.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/task.rs b/meilisearch-http/src/task.rs index fe23720aa..786d318f8 100644 --- a/meilisearch-http/src/task.rs +++ b/meilisearch-http/src/task.rs @@ -147,7 +147,7 @@ enum TaskDetails { IndexInfo { primary_key: Option }, #[serde(rename_all = "camelCase")] DocumentDeletion { - received_document_ids: usize, + matched_documents: usize, deleted_documents: Option, }, #[serde(rename_all = "camelCase")] @@ -255,7 +255,7 @@ impl From for TaskView { } => ( TaskType::DocumentDeletion, Some(TaskDetails::DocumentDeletion { - received_document_ids: ids.len(), + matched_documents: ids.len(), deleted_documents: None, }), ), From 459829631f99f643807fca9b8be95cb12fe8e160 Mon Sep 17 00:00:00 2001 From: nont Date: Sat, 1 Oct 2022 18:06:09 -0700 Subject: [PATCH 40/44] Upgrade to alpine 3.16 --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index ad21329fc..0e54fcdae 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Compile -FROM rust:alpine3.14 AS compiler +FROM rust:alpine3.16 AS compiler RUN apk add -q --update-cache --no-cache build-base openssl-dev @@ -19,7 +19,7 @@ RUN set -eux; \ cargo build --release # Run -FROM alpine:3.14 +FROM alpine:3.16 ENV MEILI_HTTP_ADDR 0.0.0.0:7700 ENV MEILI_SERVER_PROVIDER docker From 88e69f4302dc144b7338902da61d429842248a89 Mon Sep 17 00:00:00 2001 From: arriven <20084245+Arriven@users.noreply.github.com> Date: Sun, 2 Oct 2022 17:53:08 +0300 Subject: [PATCH 41/44] Increase max concurrent readers on indexes --- meilisearch-lib/src/index/dump.rs | 1 + meilisearch-lib/src/index/index.rs | 1 + meilisearch-lib/src/snapshot.rs | 1 + 3 files changed, 3 insertions(+) diff --git a/meilisearch-lib/src/index/dump.rs b/meilisearch-lib/src/index/dump.rs index 6a41fa7a0..9cc3c033f 100644 --- a/meilisearch-lib/src/index/dump.rs +++ b/meilisearch-lib/src/index/dump.rs @@ -105,6 +105,7 @@ impl Index { let mut options = EnvOpenOptions::new(); options.map_size(size); + options.max_readers(1024); let index = milli::Index::new(options, &dst_dir_path)?; let mut txn = index.write_txn()?; diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 02425d0bf..3d6c47949 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -94,6 +94,7 @@ impl Index { create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); + options.max_readers(1024); let inner = Arc::new(milli::Index::new(options, &path)?); Ok(Index { inner, diff --git a/meilisearch-lib/src/snapshot.rs b/meilisearch-lib/src/snapshot.rs index da4907939..4566a627e 100644 --- a/meilisearch-lib/src/snapshot.rs +++ b/meilisearch-lib/src/snapshot.rs @@ -181,6 +181,7 @@ impl SnapshotJob { let mut options = milli::heed::EnvOpenOptions::new(); options.map_size(self.index_size); + options.max_readers(1024); let index = milli::Index::new(options, entry.path())?; index.copy_to_path(dst, CompactionOption::Enabled)?; } From 135f656e8f70e6c949e1452f751471a355fba845 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 3 Oct 2022 10:39:42 +0200 Subject: [PATCH 42/44] Make clippy happy --- meilisearch-lib/src/tasks/task_store/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-lib/src/tasks/task_store/mod.rs b/meilisearch-lib/src/tasks/task_store/mod.rs index 621d66dd3..55dfe17d3 100644 --- a/meilisearch-lib/src/tasks/task_store/mod.rs +++ b/meilisearch-lib/src/tasks/task_store/mod.rs @@ -117,7 +117,7 @@ impl TaskStore { match filter { Some(filter) => filter .pass(&task) - .then(|| task) + .then_some(task) .ok_or(TaskError::UnexistingTask(id)), None => Ok(task), } From f7f34fb7147d8f2a97e8b4d76f591ef22ad045aa Mon Sep 17 00:00:00 2001 From: Himanshu Malviya Date: Mon, 3 Oct 2022 11:04:54 +0000 Subject: [PATCH 43/44] deleted v1.rs --- meilisearch-lib/src/dump/loaders/v1.rs | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 meilisearch-lib/src/dump/loaders/v1.rs diff --git a/meilisearch-lib/src/dump/loaders/v1.rs b/meilisearch-lib/src/dump/loaders/v1.rs deleted file mode 100644 index a07475b56..000000000 --- a/meilisearch-lib/src/dump/loaders/v1.rs +++ /dev/null @@ -1,24 +0,0 @@ -use std::path::Path; - -use serde::{Deserialize, Serialize}; - -use crate::index_controller::IndexMetadata; - -#[derive(Serialize, Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -pub struct MetadataV1 { - pub db_version: String, - indexes: Vec, -} - -impl MetadataV1 { - #[allow(dead_code, unreachable_code, unused_variables)] - pub fn load_dump( - self, - src: impl AsRef, - dst: impl AsRef, - size: usize, - indexer_options: &IndexerOpts, - ) -> anyhow::Result<()> { - anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") -} From 62b06e60880bc717fe6f28620decdd8c733d0e9a Mon Sep 17 00:00:00 2001 From: Anirudh Dayanand Date: Tue, 4 Oct 2022 13:39:40 +0530 Subject: [PATCH 44/44] Fixed broken Link --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8ac897e45..eb57a9565 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,7 +22,7 @@ If Meilisearch does not offer optimized support for your language, please consid It's [Hacktoberfest month](https://hacktoberfest.com)! 🥳 Thanks so much for participating with Meilisearch this year! -1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.digitalocean.com/resources/qualitystandards)). Our reviewers will not consider any PR that doesn’t match that standard. +1. We will follow the quality standards set by the organizers of Hacktoberfest (see detail on their [website](https://hacktoberfest.com/participation/#spam)). Our reviewers will not consider any PR that doesn’t match that standard. 2. PRs reviews will take place from Monday to Thursday, during usual working hours, CEST time. If you submit outside of these hours, there’s no need to panic; we will get around to your contribution. 3. There will be no issue assignment as we don’t want people to ask to be assigned specific issues and never return, discouraging the volunteer contributors from opening a PR to fix this issue. We take the liberty to choose the PR that best fixes the issue, so we encourage you to get to it as soon as possible and do your best!