diff --git a/.github/workflows/bench-manual.yml b/.github/workflows/bench-manual.yml index 6d8c3a006..4a9d5fcfd 100644 --- a/.github/workflows/bench-manual.yml +++ b/.github/workflows/bench-manual.yml @@ -18,11 +18,9 @@ jobs: timeout-minutes: 180 # 3h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }} run: | diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index 36af79460..6379a528c 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -35,11 +35,9 @@ jobs: fetch-depth: 0 # fetch full history to be able to get main commit sha ref: ${{ steps.comment-branch.outputs.head_ref }} - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Run benchmarks on PR ${{ github.event.issue.id }} run: | diff --git a/.github/workflows/bench-push-indexing.yml b/.github/workflows/bench-push-indexing.yml index fd0f19a5a..dfd1a3b09 100644 --- a/.github/workflows/bench-push-indexing.yml +++ b/.github/workflows/bench-push-indexing.yml @@ -12,11 +12,9 @@ jobs: timeout-minutes: 180 # 3h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Run benchmarks - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }} diff --git a/.github/workflows/benchmarks-manual.yml b/.github/workflows/benchmarks-manual.yml index b967eb073..19d477268 100644 --- a/.github/workflows/benchmarks-manual.yml +++ b/.github/workflows/benchmarks-manual.yml @@ -18,11 +18,9 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-pr.yml b/.github/workflows/benchmarks-pr.yml index 30baa294e..6a613dcb9 100644 --- a/.github/workflows/benchmarks-pr.yml +++ b/.github/workflows/benchmarks-pr.yml @@ -13,11 +13,9 @@ jobs: runs-on: benchmarks timeout-minutes: 4320 # 72h steps: - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Check for Command id: command diff --git a/.github/workflows/benchmarks-push-indexing.yml b/.github/workflows/benchmarks-push-indexing.yml index a966570e6..ae6a4634a 100644 --- a/.github/workflows/benchmarks-push-indexing.yml +++ b/.github/workflows/benchmarks-push-indexing.yml @@ -16,11 +16,9 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-push-search-geo.yml b/.github/workflows/benchmarks-push-search-geo.yml index 1b5cacfd1..8f5f8d020 100644 --- a/.github/workflows/benchmarks-push-search-geo.yml +++ b/.github/workflows/benchmarks-push-search-geo.yml @@ -15,11 +15,9 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-push-search-songs.yml b/.github/workflows/benchmarks-push-search-songs.yml index 02cd10472..a19990e07 100644 --- a/.github/workflows/benchmarks-push-search-songs.yml +++ b/.github/workflows/benchmarks-push-search-songs.yml @@ -15,11 +15,9 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/benchmarks-push-search-wiki.yml b/.github/workflows/benchmarks-push-search-wiki.yml index 455aaa95d..f7da07fda 100644 --- a/.github/workflows/benchmarks-push-search-wiki.yml +++ b/.github/workflows/benchmarks-push-search-wiki.yml @@ -15,11 +15,9 @@ jobs: runs-on: benchmarks steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Set variables - name: Set current branch name diff --git a/.github/workflows/flaky-tests.yml b/.github/workflows/flaky-tests.yml index c7e81aacc..d66417c45 100644 --- a/.github/workflows/flaky-tests.yml +++ b/.github/workflows/flaky-tests.yml @@ -16,10 +16,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Install cargo-flaky run: cargo install cargo-flaky - name: Run cargo flaky in the dumps diff --git a/.github/workflows/fuzzer-indexing.yml b/.github/workflows/fuzzer-indexing.yml index 1d01a6ea5..5d1ecc7f8 100644 --- a/.github/workflows/fuzzer-indexing.yml +++ b/.github/workflows/fuzzer-indexing.yml @@ -12,11 +12,9 @@ jobs: timeout-minutes: 4320 # 72h steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true # Run benchmarks - name: Run the fuzzer diff --git a/.github/workflows/publish-apt-brew-pkg.yml b/.github/workflows/publish-apt-brew-pkg.yml index 11893bae0..91b3ecfba 100644 --- a/.github/workflows/publish-apt-brew-pkg.yml +++ b/.github/workflows/publish-apt-brew-pkg.yml @@ -25,10 +25,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Install cargo-deb run: cargo install cargo-deb - uses: actions/checkout@v3 diff --git a/.github/workflows/publish-binaries.yml b/.github/workflows/publish-binaries.yml index 2372ce497..4f475057f 100644 --- a/.github/workflows/publish-binaries.yml +++ b/.github/workflows/publish-binaries.yml @@ -45,10 +45,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -78,10 +75,7 @@ jobs: asset_name: meilisearch-windows-amd64.exe steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Build run: cargo build --release --locked # No need to upload binaries for dry run (cron) @@ -107,12 +101,10 @@ jobs: - name: Checkout repository uses: actions/checkout@v3 - name: Installing Rust toolchain - uses: actions-rs/toolchain@v1 + uses: helix-editor/rust-toolchain@v1 with: - toolchain: stable profile: minimal target: ${{ matrix.target }} - override: true - name: Cargo build uses: actions-rs/cargo@v1 with: @@ -154,12 +146,10 @@ jobs: add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable" apt-get update -y && apt-get install -y docker-ce - name: Installing Rust toolchain - uses: actions-rs/toolchain@v1 + uses: helix-editor/rust-toolchain@v1 with: - toolchain: stable profile: minimal target: ${{ matrix.target }} - override: true - name: Configure target aarch64 GNU ## Environment variable is not passed using env: ## LD gold won't work with MUSL diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 84a82250e..7a07997ae 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -31,10 +31,7 @@ jobs: apt-get update && apt-get install -y curl apt-get install build-essential -y - name: Setup test with Rust stable - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + uses: helix-editor/rust-toolchain@v1 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - name: Run cargo check without any default features @@ -59,10 +56,7 @@ jobs: - uses: actions/checkout@v3 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Run cargo check without any default features uses: actions-rs/cargo@v1 with: @@ -87,10 +81,7 @@ jobs: run: | apt-get update apt-get install --assume-yes build-essential curl - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Run cargo build with almost all features run: | cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)" @@ -110,10 +101,7 @@ jobs: run: | apt-get update apt-get install --assume-yes build-essential curl - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Run cargo tree without default features and check lindera is not present run: | if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then @@ -137,10 +125,7 @@ jobs: run: | apt-get update && apt-get install -y curl apt-get install build-essential -y - - uses: actions-rs/toolchain@v1 - with: - toolchain: stable - override: true + - uses: helix-editor/rust-toolchain@v1 - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 - name: Run tests in debug @@ -154,11 +139,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: 1.75.0 - override: true components: clippy - name: Cache dependencies uses: Swatinem/rust-cache@v2.7.1 @@ -173,10 +156,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: nightly + toolchain: nightly-2024-06-25 override: true components: rustfmt - name: Cache dependencies diff --git a/.github/workflows/update-cargo-toml-version.yml b/.github/workflows/update-cargo-toml-version.yml index 51ab6d1ab..8b6d0a2d2 100644 --- a/.github/workflows/update-cargo-toml-version.yml +++ b/.github/workflows/update-cargo-toml-version.yml @@ -18,11 +18,9 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: helix-editor/rust-toolchain@v1 with: profile: minimal - toolchain: stable - override: true - name: Install sd run: cargo install sd - name: Update Cargo.toml file diff --git a/Cargo.lock b/Cargo.lock index 3c728f348..156e3d146 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6080,12 +6080,13 @@ dependencies = [ [[package]] name = "yaup" -version = "0.2.1" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a59e7d27bed43f7c37c25df5192ea9d435a8092a902e02203359ac9ce3e429d9" +checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6" dependencies = [ + "form_urlencoded", "serde", - "url", + "thiserror", ] [[package]] diff --git a/README.md b/README.md index 540a2c92b..d806ed963 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@

⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍

-[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow. +[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.

@@ -36,11 +36,18 @@

-🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥 +## 🖥 Examples + +- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos). +- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination. +- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs. +- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application. + +See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos). ## ✨ Features -- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results -- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience +- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results +- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience - **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need @@ -59,7 +66,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww ## 🚀 Getting started -For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. +For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide. ## 🌍 Supercharge your Meilisearch experience @@ -83,7 +90,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame ## 📊 Telemetry -Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want. +Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want. To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data. @@ -105,11 +112,11 @@ Thank you for your support! ## 👩‍💻 Contributing -Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md). +Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md). ## 📦 Versioning -Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases). +Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases). The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md). diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 213ec3230..0b98cc22a 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1811,7 +1811,7 @@ mod tests { task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose. enable_mdb_writemap: false, - index_growth_amount: 1000 * 1000, // 1 MB + index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB index_count: 5, indexer_config, autobatching_enabled: true, diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index e74f1707c..4dbf1bf6f 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -188,6 +188,12 @@ impl AuthFilter { self.allow_index_creation && self.is_index_authorized(index) } + #[inline] + /// Return true if a tenant token was used to generate the search rules. + pub fn is_tenant_token(&self) -> bool { + self.search_rules.is_some() + } + pub fn with_allowed_indexes(allowed_indexes: HashSet) -> Self { Self { search_rules: None, @@ -205,6 +211,7 @@ impl AuthFilter { .unwrap_or(true) } + /// Check if the index is authorized by the API key and the tenant token. pub fn is_index_authorized(&self, index: &str) -> bool { self.key_authorized_indexes.is_index_authorized(index) && self @@ -214,6 +221,44 @@ impl AuthFilter { .unwrap_or(true) } + /// Only check if the index is authorized by the API key + pub fn api_key_is_index_authorized(&self, index: &str) -> bool { + self.key_authorized_indexes.is_index_authorized(index) + } + + /// Only check if the index is authorized by the tenant token + pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool { + self.search_rules + .as_ref() + .map(|search_rules| search_rules.is_index_authorized(index)) + .unwrap_or(true) + } + + /// Return the list of authorized indexes by the tenant token if any + pub fn tenant_token_list_index_authorized(&self) -> Vec { + match self.search_rules { + Some(ref search_rules) => { + let mut indexes: Vec<_> = match search_rules { + SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(), + SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(), + }; + indexes.sort_unstable(); + indexes + } + None => Vec::new(), + } + } + + /// Return the list of authorized indexes by the api key if any + pub fn api_key_list_index_authorized(&self) -> Vec { + let mut indexes: Vec<_> = match self.key_authorized_indexes { + SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(), + SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(), + }; + indexes.sort_unstable(); + indexes + } + pub fn get_index_search_rules(&self, index: &str) -> Option { if !self.is_index_authorized(index) { return None; diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index f840ceb7e..bae283137 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -54,6 +54,8 @@ chinese-pinyin = ["milli/chinese-pinyin"] hebrew = ["milli/hebrew"] # japanese specialized tokenization japanese = ["milli/japanese"] +# korean specialized tokenization +korean = ["milli/korean"] # thai specialized tokenization thai = ["milli/thai"] # allow greek specialized tokenization diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 75571b535..ce73ebdcf 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -98,7 +98,6 @@ tokio-stream = "0.1.14" toml = "0.8.8" uuid = { version = "1.6.1", features = ["serde", "v4"] } walkdir = "2.4.0" -yaup = "0.2.1" serde_urlencoded = "0.7.1" termcolor = "1.4.1" url = { version = "2.5.0", features = ["serde"] } @@ -118,7 +117,7 @@ maplit = "1.0.2" meili-snap = { path = "../meili-snap" } temp-env = "0.3.6" urlencoding = "2.1.3" -yaup = "0.2.1" +yaup = "0.3.1" [build-dependencies] anyhow = { version = "1.0.79", optional = true } @@ -151,6 +150,7 @@ chinese = ["meilisearch-types/chinese"] chinese-pinyin = ["meilisearch-types/chinese-pinyin"] hebrew = ["meilisearch-types/hebrew"] japanese = ["meilisearch-types/japanese"] +korean = ["meilisearch-types/korean"] thai = ["meilisearch-types/thai"] greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 5a0b04020..96496a33f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -98,14 +98,29 @@ impl From for aweb::Error { impl From for MeilisearchHttpError { fn from(error: aweb::error::PayloadError) -> Self { - MeilisearchHttpError::Payload(PayloadError::Payload(error)) + match error { + aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload( + PayloadError::Payload(ActixPayloadError::IncompleteError), + ), + _ => MeilisearchHttpError::Payload(PayloadError::Payload( + ActixPayloadError::OtherError(error), + )), + } } } +#[derive(Debug, thiserror::Error)] +pub enum ActixPayloadError { + #[error("The provided payload is incomplete and cannot be parsed")] + IncompleteError, + #[error(transparent)] + OtherError(aweb::error::PayloadError), +} + #[derive(Debug, thiserror::Error)] pub enum PayloadError { #[error(transparent)] - Payload(aweb::error::PayloadError), + Payload(ActixPayloadError), #[error(transparent)] Json(JsonPayloadError), #[error(transparent)] @@ -122,13 +137,15 @@ impl ErrorCode for PayloadError { fn error_code(&self) -> Code { match self { PayloadError::Payload(e) => match e { - aweb::error::PayloadError::Incomplete(_) => Code::Internal, - aweb::error::PayloadError::EncodingCorrupted => Code::Internal, - aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, - aweb::error::PayloadError::UnknownLength => Code::Internal, - aweb::error::PayloadError::Http2Payload(_) => Code::Internal, - aweb::error::PayloadError::Io(_) => Code::Internal, - _ => todo!(), + ActixPayloadError::IncompleteError => Code::BadRequest, + ActixPayloadError::OtherError(error) => match error { + aweb::error::PayloadError::EncodingCorrupted => Code::Internal, + aweb::error::PayloadError::Overflow => Code::PayloadTooLarge, + aweb::error::PayloadError::UnknownLength => Code::Internal, + aweb::error::PayloadError::Http2Payload(_) => Code::Internal, + aweb::error::PayloadError::Io(_) => Code::Internal, + _ => todo!(), + }, }, PayloadError::Json(err) => match err { JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge, diff --git a/meilisearch/src/extractors/authentication/mod.rs b/meilisearch/src/extractors/authentication/mod.rs index 007e2be40..28a6d770e 100644 --- a/meilisearch/src/extractors/authentication/mod.rs +++ b/meilisearch/src/extractors/authentication/mod.rs @@ -12,6 +12,8 @@ use futures::Future; use meilisearch_auth::{AuthController, AuthFilter}; use meilisearch_types::error::{Code, ResponseError}; +use self::policies::AuthError; + pub struct GuardedData { data: D, filters: AuthFilter, @@ -35,12 +37,12 @@ impl GuardedData { let missing_master_key = auth.get_master_key().is_none(); match Self::authenticate(auth, token, index).await? { - Some(filters) => match data { + Ok(filters) => match data { Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, - None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), - None => Err(AuthenticationError::InvalidToken.into()), + Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), + Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)), } } @@ -51,12 +53,12 @@ impl GuardedData { let missing_master_key = auth.get_master_key().is_none(); match Self::authenticate(auth, String::new(), None).await? { - Some(filters) => match data { + Ok(filters) => match data { Some(data) => Ok(Self { data, filters, _marker: PhantomData }), None => Err(AuthenticationError::IrretrievableState.into()), }, - None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), - None => Err(AuthenticationError::MissingAuthorizationHeader.into()), + Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()), + Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()), } } @@ -64,7 +66,7 @@ impl GuardedData { auth: Data, token: String, index: Option, - ) -> Result, ResponseError> + ) -> Result, ResponseError> where P: Policy + 'static, { @@ -127,13 +129,14 @@ pub trait Policy { auth: Data, token: &str, index: Option<&str>, - ) -> Option; + ) -> Result; } pub mod policies { use actix_web::web::Data; use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation}; use meilisearch_auth::{AuthController, AuthFilter, SearchRules}; + use meilisearch_types::error::{Code, ErrorCode}; // reexport actions in policies in order to be used in routes configuration. pub use meilisearch_types::keys::{actions, Action}; use serde::{Deserialize, Serialize}; @@ -144,11 +147,53 @@ pub mod policies { enum TenantTokenOutcome { NotATenantToken, - Invalid, - Expired, Valid(Uuid, SearchRules), } + #[derive(thiserror::Error, Debug)] + pub enum AuthError { + #[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")] + ExpiredTenantToken { exp: i64, now: i64 }, + #[error("The provided API key is invalid.")] + InvalidApiKey, + #[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")] + TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec }, + #[error( + "The API key used to generate this tenant token cannot acces the index `{index}`." + )] + TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String }, + #[error( + "The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}." + )] + ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec }, + #[error("The provided tenant token is invalid.")] + InvalidTenantToken, + #[error("Could not decode tenant token, {0}.")] + CouldNotDecodeTenantToken(jsonwebtoken::errors::Error), + #[error("Invalid action `{0}`.")] + InternalInvalidAction(u8), + } + + impl From for AuthError { + fn from(error: jsonwebtoken::errors::Error) -> Self { + use jsonwebtoken::errors::ErrorKind; + + match error.kind() { + ErrorKind::InvalidToken => AuthError::InvalidTenantToken, + _ => AuthError::CouldNotDecodeTenantToken(error), + } + } + } + + impl ErrorCode for AuthError { + fn error_code(&self) -> Code { + match self { + AuthError::InternalInvalidAction(_) => Code::Internal, + _ => Code::InvalidApiKey, + } + } + } + fn tenant_token_validation() -> Validation { let mut validation = Validation::default(); validation.validate_exp = false; @@ -158,15 +203,15 @@ pub mod policies { } /// Extracts the key id used to sign the payload, without performing any validation. - fn extract_key_id(token: &str) -> Option { + fn extract_key_id(token: &str) -> Result { let mut validation = tenant_token_validation(); validation.insecure_disable_signature_validation(); let dummy_key = DecodingKey::from_secret(b"secret"); - let token_data = decode::(token, &dummy_key, &validation).ok()?; + let token_data = decode::(token, &dummy_key, &validation)?; // get token fields without validating it. let Claims { api_key_uid, .. } = token_data.claims; - Some(api_key_uid) + Ok(api_key_uid) } fn is_keys_action(action: u8) -> bool { @@ -187,76 +232,102 @@ pub mod policies { auth: Data, token: &str, index: Option<&str>, - ) -> Option { + ) -> Result { // authenticate if token is the master key. // Without a master key, all routes are accessible except the key-related routes. if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) { - return Some(AuthFilter::default()); + return Ok(AuthFilter::default()); } let (key_uuid, search_rules) = match ActionPolicy::::authenticate_tenant_token(&auth, token) { - TenantTokenOutcome::Valid(key_uuid, search_rules) => { + Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => { (key_uuid, Some(search_rules)) } - TenantTokenOutcome::Expired => return None, - TenantTokenOutcome::Invalid => return None, - TenantTokenOutcome::NotATenantToken => { - (auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None) - } + Ok(TenantTokenOutcome::NotATenantToken) + | Err(AuthError::InvalidTenantToken) => ( + auth.get_optional_uid_from_encoded_key(token.as_bytes()) + .map_err(|_e| AuthError::InvalidApiKey)? + .ok_or(AuthError::InvalidApiKey)?, + None, + ), + Err(e) => return Err(e), }; // check that the indexes are allowed - let action = Action::from_repr(A)?; - let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?; - if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) - && index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true) - { - return Some(auth_filter); + let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?; + let auth_filter = auth + .get_key_filters(key_uuid, search_rules) + .map_err(|_e| AuthError::InvalidApiKey)?; + + // First check if the index is authorized in the tenant token, this is a public + // information, we can return a nice error message. + if let Some(index) = index { + if !auth_filter.tenant_token_is_index_authorized(index) { + return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex { + index: index.to_string(), + allowed: auth_filter.tenant_token_list_index_authorized(), + }); + } + if !auth_filter.api_key_is_index_authorized(index) { + if auth_filter.is_tenant_token() { + // If the error comes from a tenant token we cannot share the list + // of authorized indexes in the API key. This is not public information. + return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex { + index: index.to_string(), + }); + } else { + // Otherwise we can share the list + // of authorized indexes in the API key. + return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex { + index: index.to_string(), + allowed: auth_filter.api_key_list_index_authorized(), + }); + } + } + } + if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) { + return Ok(auth_filter); } - None + Err(AuthError::InvalidApiKey) } } impl ActionPolicy { - fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome { + fn authenticate_tenant_token( + auth: &AuthController, + token: &str, + ) -> Result { // Only search action can be accessed by a tenant token. if A != actions::SEARCH { - return TenantTokenOutcome::NotATenantToken; + return Ok(TenantTokenOutcome::NotATenantToken); } - let uid = if let Some(uid) = extract_key_id(token) { - uid - } else { - return TenantTokenOutcome::NotATenantToken; - }; + let uid = extract_key_id(token)?; // Check if tenant token is valid. let key = if let Some(key) = auth.generate_key(uid) { key } else { - return TenantTokenOutcome::Invalid; + return Err(AuthError::InvalidTenantToken); }; - let data = if let Ok(data) = decode::( + let data = decode::( token, &DecodingKey::from_secret(key.as_bytes()), &tenant_token_validation(), - ) { - data - } else { - return TenantTokenOutcome::Invalid; - }; + )?; // Check if token is expired. if let Some(exp) = data.claims.exp { - if OffsetDateTime::now_utc().unix_timestamp() > exp { - return TenantTokenOutcome::Expired; + let now = OffsetDateTime::now_utc().unix_timestamp(); + if now > exp { + return Err(AuthError::ExpiredTenantToken { exp, now }); } } - TenantTokenOutcome::Valid(uid, data.claims.search_rules) + Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules)) } } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 61df1827d..a3a4b48a3 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -1331,13 +1331,23 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) { // TODO: TAMO: milli encountered an internal error, what do we want to do? let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()]; let geo_point = &document.get("_geo").unwrap_or(&json!(null)); - if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) { + if let Some((lat, lng)) = + extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"])) + { let distance = milli::distance_between_two_points(&base, &[lat, lng]); document.insert("_geoDistance".to_string(), json!(distance.round() as usize)); } } } +fn extract_geo_value(value: &Value) -> Option { + match value { + Value::Number(n) => n.as_f64(), + Value::String(s) => s.parse().ok(), + _ => None, + } +} + fn compute_formatted_options( attr_to_highlight: &HashSet, attr_to_crop: &[String], @@ -1711,4 +1721,54 @@ mod test { insert_geo_distance(sorters, &mut document); assert_eq!(document.get("_geoDistance"), None); } + + #[test] + fn test_insert_geo_distance_with_coords_as_string() { + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": "50", + "lng": 3 + } + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50,3):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": "50", + "lng": "3" + }, + "id": "1" + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50,3):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + + let value: Document = serde_json::from_str( + r#"{ + "_geo": { + "lat": 50, + "lng": "3" + }, + "id": "1" + }"#, + ) + .unwrap(); + + let sorters = &["_geoPoint(50,3):desc".to_string()]; + let mut document = value.clone(); + insert_geo_distance(sorters, &mut document); + assert_eq!(document.get("_geoDistance"), Some(&json!(0))); + } } diff --git a/meilisearch/tests/auth/authorization.rs b/meilisearch/tests/auth/authorization.rs index d26bb26b8..609b7d01b 100644 --- a/meilisearch/tests/auth/authorization.rs +++ b/meilisearch/tests/auth/authorization.rs @@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy> = Lazy::new(|| { }); static INVALID_RESPONSE: Lazy = Lazy::new(|| { - json!({"message": "The provided API key is invalid.", + json!({"message": null, "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" @@ -119,7 +119,8 @@ async fn error_access_expired_key() { thread::sleep(time::Duration::new(1, 0)); for (method, route) in AUTHORIZATIONS.keys() { - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); @@ -149,7 +150,8 @@ async fn error_access_unauthorized_index() { // filter `products` index routes .filter(|(_, route)| route.starts_with("/indexes/products")) { - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); @@ -176,7 +178,8 @@ async fn error_access_unauthorized_action() { let key = response["key"].as_str().unwrap(); server.use_api_key(key); - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route); assert_eq!(403, code, "{:?}", &response); @@ -280,7 +283,7 @@ async fn access_authorized_no_index_restriction() { route, action ); - assert_ne!(code, 403); + assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action); } } } diff --git a/meilisearch/tests/auth/errors.rs b/meilisearch/tests/auth/errors.rs index 581243a0a..466eefe65 100644 --- a/meilisearch/tests/auth/errors.rs +++ b/meilisearch/tests/auth/errors.rs @@ -1,7 +1,10 @@ +use actix_web::test; +use http::StatusCode; +use jsonwebtoken::{EncodingKey, Header}; use meili_snap::*; use uuid::Uuid; -use crate::common::Server; +use crate::common::{Server, Value}; use crate::json; #[actix_rt::test] @@ -436,3 +439,262 @@ async fn patch_api_keys_unknown_field() { } "###); } + +async fn send_request_with_custom_auth( + app: impl actix_web::dev::Service< + actix_http::Request, + Response = actix_web::dev::ServiceResponse, + Error = actix_web::Error, + >, + url: &str, + auth: &str, +) -> (Value, StatusCode) { + let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + + (response, status_code) +} + +#[actix_rt::test] +async fn invalid_auth_format() { + let server = Server::new_auth().await; + let app = server.init_web_app().await; + + let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"401 Unauthorized"); + snapshot!(response, @r###" + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + "###); + + let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"401 Unauthorized"); + snapshot!(response, @r###" + { + "message": "The Authorization header is missing. It must use the bearer authorization method.", + "code": "missing_authorization_header", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#missing_authorization_header" + } + "###); + + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); +} + +#[actix_rt::test] +async fn invalid_api_key() { + let server = Server::new_auth().await; + let app = server.init_web_app().await; + + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let uuid = Uuid::nil(); + let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() }); + let req = test::TestRequest::post() + .uri("/keys") + .insert_header(("Authorization", "Bearer MASTER_KEY")) + .set_json(&key) + .to_request(); + let res = test::call_service(&app, req).await; + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###" + { + "name": null, + "description": null, + "key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9", + "uid": "00000000-0000-0000-0000-000000000000", + "actions": [ + "search" + ], + "indexes": [ + "dog" + ], + "expiresAt": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); + let key = response["key"].as_str().unwrap(); + + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}")) + .await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); +} + +#[actix_rt::test] +async fn invalid_tenant_token() { + let server = Server::new_auth().await; + let app = server.init_web_app().await; + + // The tenant token won't be recognized at all if we're not on a search route + let claims = json!({ "tamo": "kefir" }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}")) + .await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided API key is invalid.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let claims = json!({ "tamo": "kefir" }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // The error messages are not ideal but that's expected since we cannot _yet_ use deserr + let claims = json!({ "searchRules": "kefir" }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + let uuid = Uuid::nil(); + let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() }); + let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo")) + .unwrap(); + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "Could not decode tenant token, InvalidSignature.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // ~~ For the next tests we first need a valid API key + let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() }); + let req = test::TestRequest::post() + .uri("/keys") + .insert_header(("Authorization", "Bearer MASTER_KEY")) + .set_json(&key) + .to_request(); + let res = test::call_service(&app, req).await; + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(json_string!(response, { ".createdAt" => "[date]", ".updatedAt" => "[date]" }), @r###" + { + "name": null, + "description": null, + "key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9", + "uid": "00000000-0000-0000-0000-000000000000", + "actions": [ + "search" + ], + "indexes": [ + "dog" + ], + "expiresAt": null, + "createdAt": "[date]", + "updatedAt": "[date]" + } + "###); + let key = response["key"].as_str().unwrap(); + + let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() }); + let jwt = jsonwebtoken::encode( + &Header::default(), + &claims, + &EncodingKey::from_secret(key.as_bytes()), + ) + .unwrap(); + // Try to access an index that is not authorized by the tenant token + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); + + // Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt + let (response, status_code) = + send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}")) + .await; + snapshot!(status_code, @"403 Forbidden"); + snapshot!(response, @r###" + { + "message": "The API key used to generate this tenant token cannot acces the index `doggo`.", + "code": "invalid_api_key", + "type": "auth", + "link": "https://docs.meilisearch.com/errors#invalid_api_key" + } + "###); +} diff --git a/meilisearch/tests/auth/tenant_token.rs b/meilisearch/tests/auth/tenant_token.rs index ba3b0b234..5e8a75c36 100644 --- a/meilisearch/tests/auth/tenant_token.rs +++ b/meilisearch/tests/auth/tenant_token.rs @@ -53,7 +53,8 @@ static DOCUMENTS: Lazy = Lazy::new(|| { }); static INVALID_RESPONSE: Lazy = Lazy::new(|| { - json!({"message": "The provided API key is invalid.", + json!({ + "message": null, "code": "invalid_api_key", "type": "auth", "link": "https://docs.meilisearch.com/errors#invalid_api_key" @@ -191,7 +192,9 @@ macro_rules! compute_forbidden_search { server.use_api_key(&web_token); let index = server.index("sales"); index - .search(json!({}), |response, code| { + .search(json!({}), |mut response, code| { + // We don't assert anything on the message since it may change between cases + response["message"] = serde_json::json!(null); assert_eq!( response, INVALID_RESPONSE.clone(), @@ -495,7 +498,8 @@ async fn error_access_forbidden_routes() { for ((method, route), actions) in AUTHORIZATIONS.iter() { if !actions.contains("search") { - let (response, code) = server.dummy_request(method, route).await; + let (mut response, code) = server.dummy_request(method, route).await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } @@ -529,14 +533,16 @@ async fn error_access_expired_parent_key() { server.use_api_key(&web_token); // test search request while parent_key is not expired - let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; + let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; + response["message"] = serde_json::json!(null); assert_ne!(response, INVALID_RESPONSE.clone()); assert_ne!(code, 403); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); - let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; + let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } @@ -585,7 +591,8 @@ async fn error_access_modified_token() { .join("."); server.use_api_key(&altered_token); - let (response, code) = server.dummy_request("POST", "/indexes/products/search").await; + let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await; + response["message"] = serde_json::json!(null); assert_eq!(response, INVALID_RESPONSE.clone()); assert_eq!(code, 403); } diff --git a/meilisearch/tests/auth/tenant_token_multi_search.rs b/meilisearch/tests/auth/tenant_token_multi_search.rs index 09b5dbbcc..81146d14e 100644 --- a/meilisearch/tests/auth/tenant_token_multi_search.rs +++ b/meilisearch/tests/auth/tenant_token_multi_search.rs @@ -109,9 +109,11 @@ static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { fn invalid_response(query_index: Option) -> Value { let message = if let Some(query_index) = query_index { - format!("Inside `.queries[{query_index}]`: The provided API key is invalid.") + json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")) } else { - "The provided API key is invalid.".to_string() + // if it's anything else we simply return null and will tests all the + // error messages somewhere else + json!(null) }; json!({"message": message, "code": "invalid_api_key", @@ -414,7 +416,10 @@ macro_rules! compute_forbidden_single_search { for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) { let web_token = generate_tenant_token(&uid, &key, tenant_token.clone()); server.use_api_key(&web_token); - let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await; + let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await; + if failed_query_index.is_none() && !response["message"].is_null() { + response["message"] = serde_json::json!(null); + } assert_eq!( response, invalid_response(failed_query_index), @@ -469,10 +474,13 @@ macro_rules! compute_forbidden_multiple_search { for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) { let web_token = generate_tenant_token(&uid, &key, tenant_token.clone()); server.use_api_key(&web_token); - let (response, code) = server.multi_search(json!({"queries" : [ + let (mut response, code) = server.multi_search(json!({"queries" : [ {"indexUid": "sales"}, {"indexUid": "products"}, ]})).await; + if failed_query_index.is_none() && !response["message"].is_null() { + response["message"] = serde_json::json!(null); + } assert_eq!( response, invalid_response(failed_query_index), @@ -1073,18 +1081,20 @@ async fn error_access_expired_parent_key() { server.use_api_key(&web_token); // test search request while parent_key is not expired - let (response, code) = server + let (mut response, code) = server .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]})) .await; + response["message"] = serde_json::json!(null); assert_ne!(response, invalid_response(None)); assert_ne!(code, 403); // wait until the key is expired. thread::sleep(time::Duration::new(1, 0)); - let (response, code) = server + let (mut response, code) = server .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]})) .await; + response["message"] = serde_json::json!(null); assert_eq!(response, invalid_response(None)); assert_eq!(code, 403); } @@ -1134,8 +1144,9 @@ async fn error_access_modified_token() { .join("."); server.use_api_key(&altered_token); - let (response, code) = + let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await; + response["message"] = serde_json::json!(null); assert_eq!(response, invalid_response(None)); assert_eq!(code, 403); } diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 114ede9b8..c8afa5e3e 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -365,7 +365,7 @@ impl Index<'_> { } pub async fn search_get(&self, query: &str) -> (Value, StatusCode) { - let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query); + let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await } @@ -402,7 +402,7 @@ impl Index<'_> { } pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) { - let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query); + let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query); self.service.get(url).await } diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 4476e0d1f..1317dbce7 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -42,6 +42,12 @@ impl std::ops::Deref for Value { } } +impl std::ops::DerefMut for Value { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + impl PartialEq for Value { fn eq(&self, other: &serde_json::Value) -> bool { &self.0 == other diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index b1262fa2d..5e32564c7 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -183,6 +183,58 @@ async fn add_single_document_gzip_encoded() { } "###); } +#[actix_rt::test] +async fn add_single_document_gzip_encoded_with_incomplete_error() { + let document = json!("kefir"); + + // this is a what is expected and should work + let server = Server::new().await; + let app = server.init_web_app().await; + // post + let document = serde_json::to_string(&document).unwrap(); + let req = test::TestRequest::post() + .uri("/indexes/dog/documents") + .set_payload(document.to_string()) + .insert_header(("content-type", "application/json")) + .insert_header(("content-encoding", "gzip")) + .to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"400 Bad Request"); + snapshot!(json_string!(response), + @r###" + { + "message": "The provided payload is incomplete and cannot be parsed", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); + + // put + let req = test::TestRequest::put() + .uri("/indexes/dog/documents") + .set_payload(document.to_string()) + .insert_header(("content-type", "application/json")) + .insert_header(("content-encoding", "gzip")) + .to_request(); + let res = test::call_service(&app, req).await; + let status_code = res.status(); + let body = test::read_body(res).await; + let response: Value = serde_json::from_slice(&body).unwrap_or_default(); + snapshot!(status_code, @"400 Bad Request"); + snapshot!(json_string!(response), + @r###" + { + "message": "The provided payload is incomplete and cannot be parsed", + "code": "bad_request", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#bad_request" + } + "###); +} /// Here we try document request with every encoding #[actix_rt::test] @@ -1040,6 +1092,52 @@ async fn document_addition_with_primary_key() { "###); } +#[actix_rt::test] +async fn document_addition_with_huge_int_primary_key() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = json!([ + { + "primary": 14630868576586246730u64, + "content": "foo", + } + ]); + let (response, code) = index.add_documents(documents, Some("primary")).await; + snapshot!(code, @"202 Accepted"); + + let response = index.wait_task(response.uid()).await; + snapshot!(response, + @r###" + { + "uid": 0, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.get_document(14630868576586246730u64, None).await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response), + @r###" + { + "primary": 14630868576586246730, + "content": "foo" + } + "###); +} + #[actix_rt::test] async fn replace_document() { let server = Server::new().await; diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index dc7bf70a7..a95797227 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -71,7 +71,7 @@ async fn search_bad_offset() { } "###); - let (response, code) = index.search_get("offset=doggo").await; + let (response, code) = index.search_get("?offset=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -99,7 +99,7 @@ async fn search_bad_limit() { } "###); - let (response, code) = index.search_get("limit=doggo").await; + let (response, code) = index.search_get("?limit=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -127,7 +127,7 @@ async fn search_bad_page() { } "###); - let (response, code) = index.search_get("page=doggo").await; + let (response, code) = index.search_get("?page=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() { } "###); - let (response, code) = index.search_get("hitsPerPage=doggo").await; + let (response, code) = index.search_get("?hitsPerPage=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -269,7 +269,7 @@ async fn search_bad_crop_length() { } "###); - let (response, code) = index.search_get("cropLength=doggo").await; + let (response, code) = index.search_get("?cropLength=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -359,7 +359,7 @@ async fn search_bad_show_matches_position() { } "###); - let (response, code) = index.search_get("showMatchesPosition=doggo").await; + let (response, code) = index.search_get("?showMatchesPosition=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -442,7 +442,7 @@ async fn search_non_filterable_facets() { } "###); - let (response, code) = index.search_get("facets=doggo").await; + let (response, code) = index.search_get("?facets=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -472,7 +472,7 @@ async fn search_non_filterable_facets_multiple_filterable() { } "###); - let (response, code) = index.search_get("facets=doggo").await; + let (response, code) = index.search_get("?facets=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -502,7 +502,7 @@ async fn search_non_filterable_facets_no_filterable() { } "###); - let (response, code) = index.search_get("facets=doggo").await; + let (response, code) = index.search_get("?facets=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -532,7 +532,7 @@ async fn search_non_filterable_facets_multiple_facets() { } "###); - let (response, code) = index.search_get("facets=doggo,neko").await; + let (response, code) = index.search_get("?facets=doggo,neko").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -625,7 +625,7 @@ async fn search_bad_matching_strategy() { } "###); - let (response, code) = index.search_get("matchingStrategy=doggo").await; + let (response, code) = index.search_get("?matchingStrategy=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/search/geo.rs b/meilisearch/tests/search/geo.rs index 8754453ba..7804f1ad0 100644 --- a/meilisearch/tests/search/geo.rs +++ b/meilisearch/tests/search/geo.rs @@ -150,7 +150,8 @@ async fn bug_4640() { "_geo": { "lat": "45.4777599", "lng": "9.1967508" - } + }, + "_geoDistance": 0 }, { "id": 1, diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index 546554882..8b2bb57a4 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -241,7 +241,7 @@ async fn similar_bad_offset() { } "###); - let (response, code) = index.similar_get("id=287947&offset=doggo").await; + let (response, code) = index.similar_get("?id=287947&offset=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -283,7 +283,7 @@ async fn similar_bad_limit() { } "###); - let (response, code) = index.similar_get("id=287946&limit=doggo").await; + let (response, code) = index.similar_get("?id=287946&limit=doggo").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs index ed387224e..f2ed76b6a 100644 --- a/meilisearch/tests/tasks/mod.rs +++ b/meilisearch/tests/tasks/mod.rs @@ -2,6 +2,7 @@ mod errors; mod webhook; use meili_snap::insta::assert_json_snapshot; +use meili_snap::snapshot; use time::format_description::well_known::Rfc3339; use time::OffsetDateTime; @@ -738,11 +739,9 @@ async fn test_summarized_index_creation() { async fn test_summarized_index_deletion() { let server = Server::new().await; let index = server.index("test"); - index.delete().await; - index.wait_task(0).await; - let (task, _) = index.get_task(0).await; - assert_json_snapshot!(task, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + let (ret, _code) = index.delete().await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, @r###" { "uid": 0, @@ -767,12 +766,34 @@ async fn test_summarized_index_deletion() { "###); // is the details correctly set when documents are actually deleted. - index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; - index.delete().await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; - assert_json_snapshot!(task, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + // /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow, + // both tasks may get autobatched and the deleted documents count will be wrong. + let (ret, _code) = + index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, + @r###" + { + "uid": 1, + "indexUid": "test", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (ret, _code) = index.delete().await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, @r###" { "uid": 2, @@ -792,22 +813,25 @@ async fn test_summarized_index_deletion() { "###); // What happens when you delete an index that doesn't exists. - index.delete().await; - index.wait_task(2).await; - let (task, _) = index.get_task(2).await; - assert_json_snapshot!(task, - { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }, + let (ret, _code) = index.delete().await; + let task = index.wait_task(ret.uid()).await; + snapshot!(task, @r###" { - "uid": 2, + "uid": 3, "indexUid": "test", - "status": "succeeded", + "status": "failed", "type": "indexDeletion", "canceledBy": null, "details": { - "deletedDocuments": 1 + "deletedDocuments": 0 + }, + "error": { + "message": "Index `test` not found.", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" }, - "error": null, "duration": "[duration]", "enqueuedAt": "[date]", "startedAt": "[date]", diff --git a/milli/src/documents/primary_key.rs b/milli/src/documents/primary_key.rs index 29f95beaf..22918f8fc 100644 --- a/milli/src/documents/primary_key.rs +++ b/milli/src/documents/primary_key.rs @@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult Ok(s.to_string()), None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }), }, - Value::Number(number) if number.is_i64() => Ok(number.to_string()), + Value::Number(number) if !number.is_f64() => Ok(number.to_string()), content => Err(UserError::InvalidDocumentId { document_id: content }), } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index bf488f9f0..8ae1ebb0f 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -371,4 +371,28 @@ mod test { assert_eq!(documents_ids, vec![1]); } + + #[cfg(feature = "korean")] + #[test] + fn test_hangul_language_detection() { + use crate::index::tests::TempIndex; + + let index = TempIndex::new(); + + index + .add_documents(documents!([ + { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" }, + { "id": 1, "title": "김밥먹을래。" }, + { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" } + ])) + .unwrap(); + + let txn = index.write_txn().unwrap(); + let mut search = Search::new(&txn, &index); + + search.query("김밥"); + let SearchResult { documents_ids, .. } = search.execute().unwrap(); + + assert_eq!(documents_ids, vec![1]); + } } diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 000000000..4739bf10a --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,3 @@ +[toolchain] +channel = "1.75.0" +components = ["clippy"]