diff --git a/Cargo.lock b/Cargo.lock index d6c674f68..5f906703a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arroy" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" +source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d" dependencies = [ "bytemuck", "byteorder", "heed", "log", "memmap2", + "nohash", "ordered-float", "rand", "rayon", @@ -471,7 +471,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] name = "benchmarks" -version = "1.10.0" +version = "1.11.0" dependencies = [ "anyhow", "bytes", @@ -527,7 +527,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", "syn 2.0.60", ] @@ -652,7 +652,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.10.0" +version = "1.11.0" dependencies = [ "anyhow", "time", @@ -933,9 +933,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d" +checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798" dependencies = [ "aho-corasick", "csv", @@ -1622,7 +1622,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.10.0" +version = "1.11.0" dependencies = [ "anyhow", "big_s", @@ -1834,7 +1834,7 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "file-store" -version = "1.10.0" +version = "1.11.0" dependencies = [ "tempfile", "thiserror", @@ -1856,7 +1856,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.10.0" +version = "1.11.0" dependencies = [ "insta", "nom", @@ -1876,7 +1876,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.10.0" +version = "1.11.0" dependencies = [ "criterion", "serde_json", @@ -2000,7 +2000,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.10.0" +version = "1.11.0" dependencies = [ "arbitrary", "clap", @@ -2552,7 +2552,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.10.0" +version = "1.11.0" dependencies = [ "anyhow", "arroy", @@ -2746,7 +2746,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.10.0" +version = "1.11.0" dependencies = [ "criterion", "serde_json", @@ -3365,7 +3365,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.10.0" +version = "1.11.0" dependencies = [ "insta", "md5", @@ -3374,7 +3374,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.10.0" +version = "1.11.0" dependencies = [ "actix-cors", "actix-http", @@ -3463,7 +3463,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.10.0" +version = "1.11.0" dependencies = [ "base64 0.22.1", "enum-iterator", @@ -3482,7 +3482,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.10.0" +version = "1.11.0" dependencies = [ "actix-web", "anyhow", @@ -3513,7 +3513,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.10.0" +version = "1.11.0" dependencies = [ "anyhow", "clap", @@ -3521,6 +3521,7 @@ dependencies = [ "file-store", "meilisearch-auth", "meilisearch-types", + "serde", "time", "uuid", ] @@ -3543,7 +3544,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.10.0" +version = "1.11.0" dependencies = [ "arroy", "big_s", @@ -3686,6 +3687,12 @@ version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" +[[package]] +name = "nohash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca" + [[package]] name = "nom" version = "7.1.3" @@ -3977,7 +3984,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.10.0" +version = "1.11.0" dependencies = [ "big_s", "serde_json", @@ -4308,7 +4315,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 1.1.0", "rustls", "thiserror", "tokio", @@ -4317,14 +4324,14 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.3" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +checksum = "fadfaed2cd7f389d0161bb73eeb07b7b78f8691047a6f3e73caaeae55310a4a6" dependencies = [ "bytes", "rand", "ring", - "rustc-hash", + "rustc-hash 2.0.0", "rustls", "slab", "thiserror", @@ -4697,6 +4704,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + [[package]] name = "rustc_version" version = "0.4.0" @@ -4835,9 +4848,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" dependencies = [ "serde_derive", ] @@ -4853,9 +4866,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.209" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" dependencies = [ "proc-macro2", "quote", @@ -5348,7 +5361,7 @@ dependencies = [ "fancy-regex 0.12.0", "lazy_static", "parking_lot", - "rustc-hash", + "rustc-hash 1.1.0", ] [[package]] @@ -6362,7 +6375,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.10.0" +version = "1.11.0" dependencies = [ "anyhow", "build-info", diff --git a/Cargo.toml b/Cargo.toml index 0fbfa9b12..5d9e1bd82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ ] [workspace.package] -version = "1.10.0" +version = "1.11.0" authors = [ "Quentin de Quelen ", "Clément Renault ", diff --git a/README.md b/README.md index e60d09b13..59d618ab2 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co ## ✨ Features - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results - **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience -- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings +- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need -- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results +- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling -- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants +- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs - **Easy to install, deploy, and maintain** diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index 3b96cbfb0..4f66ed8b3 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -255,6 +255,8 @@ pub(crate) mod test { } "###); + insta::assert_json_snapshot!(vector_index.settings().unwrap()); + { let documents: Result> = vector_index.documents().unwrap().collect(); let mut documents = documents.unwrap(); diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap index 43bdb9726..77694a629 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap @@ -1,783 +1,56 @@ --- source: dump/src/reader/mod.rs -expression: document +expression: vector_index.settings().unwrap() --- { - "id": "e3", - "desc": "overriden vector + map", - "_vectors": { - "default": [ - 0.2, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ], - "toto": [ - 0.1 - ] - } + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "proximityPrecision": "byWord", + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + }, + "embedders": { + "default": { + "source": "huggingFace", + "model": "BAAI/bge-base-en-v1.5", + "revision": "617ca489d9e86b49b8167676d8220688b99db36e", + "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" + } + }, + "searchCutoffMs": null } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap index a9c76227a..43bdb9726 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap @@ -3,784 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e2", - "desc": "natural vector + map", + "id": "e3", + "desc": "overriden vector + map", "_vectors": { - "toto": [], - "default": { - "embeddings": [ - [ - -0.05189208313822746, - -0.9273212552070618, - 0.1443813145160675, - 0.0932632014155388, - 0.2665371894836426, - 0.36266782879829407, - 0.6402910947799683, - 0.32014018297195435, - 0.030915971845388412, - -0.9312191605567932, - -0.3718109726905823, - -0.2700554132461548, - -1.1014580726623535, - 0.9154956936836244, - -0.3406888246536255, - 1.0077725648880005, - 0.6577560901641846, - -0.3955195546150207, - -0.4148270785808563, - 0.1855088472366333, - 0.5062315464019775, - -0.3632686734199524, - -0.2277890294790268, - 0.2560805082321167, - -0.3853609561920166, - -0.1604762226343155, - -0.13947471976280212, - -0.20147813856601715, - -0.4466346800327301, - -0.3761846721172333, - 0.1443382054567337, - 0.18205296993255615, - 0.49359792470932007, - -0.22538000345230105, - -0.4996317625045776, - -0.22734887897968292, - -0.6034309267997742, - -0.7857939600944519, - -0.34923747181892395, - -0.3466345965862274, - 0.21176661550998688, - -0.5101462006568909, - -0.3403083384037018, - 0.000315118464641273, - 0.236465722322464, - -0.10246097296476364, - -1.3013339042663574, - 0.3419138789176941, - -0.32963496446609497, - -0.0901619717478752, - -0.5426247119903564, - 0.22656650841236117, - -0.44758284091949463, - 0.14151698350906372, - -0.1089438870549202, - 0.5500766634941101, - -0.670711100101471, - -0.6227269768714905, - 0.3894464075565338, - -0.27609574794769287, - 0.7028202414512634, - -0.19697771966457367, - 0.328511506319046, - 0.5063360929489136, - 0.4065195322036743, - 0.2614171802997589, - -0.30274391174316406, - 1.0393824577331543, - -0.7742937207221985, - -0.7874112129211426, - -0.6749666929244995, - 0.5190866589546204, - 0.004123548045754433, - -0.28312963247299194, - -0.038731709122657776, - -1.0142987966537476, - -0.09519586712121964, - 0.8755272626876831, - 0.4876938760280609, - 0.7811151742935181, - 0.85174959897995, - 0.11826585978269576, - 0.5373436808586121, - 0.3649002015590668, - 0.19064077734947205, - -0.00287026260048151, - -0.7305403351783752, - -0.015206154435873032, - -0.7899249196052551, - 0.19407285749912265, - 0.08596625179052353, - -0.28976231813430786, - -0.1525907665491104, - 0.3798313438892365, - 0.050306469202041626, - -0.5697937607765198, - 0.4219021201133728, - 0.276252806186676, - 0.1559903472661972, - 0.10030482709407806, - -0.4043720066547394, - -0.1969818025827408, - 0.5739826560020447, - 0.2116064727306366, - -1.4620544910430908, - -0.7802462577819824, - -0.24739810824394223, - -0.09791352599859238, - -0.4413802027702331, - 0.21549351513385773, - -0.9520436525344848, - -0.08762510865926743, - 0.08154498040676117, - -0.6154940724372864, - -1.01079523563385, - 0.885427713394165, - 0.6967288851737976, - 0.27186504006385803, - -0.43194177746772766, - -0.11248451471328735, - 0.7576630711555481, - 0.4998855590820313, - 0.0264343973249197, - 0.9872855544090272, - 0.5634694695472717, - 0.053698331117630005, - 0.19410227239131927, - 0.3570743501186371, - -0.23670297861099243, - -0.9114483594894408, - 0.07884842902421951, - 0.7318344116210938, - 0.44630110263824463, - 0.08745364099740982, - -0.347101628780365, - -0.4314247667789459, - -0.5060274004936218, - 0.003706763498485088, - 0.44320008158683777, - -0.00788921769708395, - -0.1368623524904251, - -0.17391923069953918, - 0.14473655819892883, - 0.10927865654230118, - 0.6974599361419678, - 0.005052129738032818, - -0.016953065991401672, - -0.1256176233291626, - -0.036742497235536575, - 0.5591985583305359, - -0.37619709968566895, - 0.22429119050502777, - 0.5403043031692505, - -0.8603790998458862, - -0.3456307053565979, - 0.9292937517166138, - 0.5074859261512756, - 0.6310645937919617, - -0.3091641068458557, - 0.46902573108673096, - 0.7891915440559387, - 0.4499550759792328, - 0.2744995653629303, - 0.2712305784225464, - -0.04349074140191078, - -0.3638863265514374, - 0.7839881777763367, - 0.7352104783058167, - -0.19457511603832245, - -0.5957832932472229, - -0.43704694509506226, - -1.084769368171692, - 0.4904985725879669, - 0.5385226011276245, - 0.1891629993915558, - 0.12338479608297348, - 0.8315675258636475, - -0.07830192148685455, - 1.0916285514831543, - -0.28066861629486084, - -1.3585069179534912, - 0.5203898549079895, - 0.08678033947944641, - -0.2566044330596924, - 0.09484415501356123, - -0.0180208683013916, - 1.0264745950698853, - -0.023572135716676712, - 0.5864979028701782, - 0.7625196576118469, - -0.2543414533138275, - -0.8877770900726318, - 0.7611982822418213, - -0.06220436468720436, - 0.937336564064026, - 0.2704363465309143, - -0.37733694911003113, - 0.5076137781143188, - -0.30641937255859375, - 0.6252772808074951, - -0.0823579877614975, - -0.03736555948853493, - 0.4131673276424408, - -0.6514252424240112, - 0.12918265163898468, - -0.4483584463596344, - 0.6750786304473877, - -0.37008383870124817, - -0.02324833907186985, - 0.38027650117874146, - -0.26374951004981995, - 0.4346931278705597, - 0.42882832884788513, - -0.48798441886901855, - 1.1882442235946655, - 0.5132288336753845, - 0.5284568667411804, - -0.03538886830210686, - 0.29620853066444397, - -1.0683696269989014, - 0.25936177372932434, - 0.10404160618782043, - -0.25796034932136536, - 0.027896970510482788, - -0.09225251525640488, - 1.4811025857925415, - 0.641173779964447, - -0.13838383555412292, - -0.3437179923057556, - 0.5667019486427307, - -0.5400741696357727, - 0.31090837717056274, - 0.6470608115196228, - -0.3747067153453827, - -0.7364534735679626, - -0.07431528717279434, - 0.5173454880714417, - -0.6578747034072876, - 0.7107478976249695, - -0.7918999791145325, - -0.0648345872759819, - 0.609937846660614, - -0.7329513430595398, - 0.9741371870040894, - 0.17912346124649048, - -0.02658769302070141, - 0.5162150859832764, - -0.3978803157806397, - -0.7833885550498962, - -0.6497276425361633, - -0.3898126780986786, - -0.0952848568558693, - 0.2663288116455078, - -0.1604052186012268, - 0.373076468706131, - -0.8357769250869751, - -0.05217683315277099, - -0.2680160701274872, - 0.8389158248901367, - 0.6833611130714417, - -0.6712407469749451, - 0.7406917214393616, - -0.44522786140441895, - -0.34645363688468933, - -0.27384576201438904, - -0.9878405928611756, - -0.8166060447692871, - 0.06268279999494553, - 0.38567957282066345, - -0.3274703919887543, - 0.5296315550804138, - -0.11810623109340668, - 0.23029841482639313, - 0.08616159111261368, - -0.2195747196674347, - 0.09430307894945145, - 0.4057176411151886, - 0.4892159104347229, - -0.1636916548013687, - -0.6071445345878601, - 0.41256585717201233, - 0.622254490852356, - -0.41223976016044617, - -0.6686707139015198, - -0.7474371790885925, - -0.8509522080421448, - -0.16754287481307983, - -0.9078601002693176, - -0.29653599858283997, - -0.5020652413368225, - 0.4692700505256653, - 0.01281109917908907, - -0.16071580350399017, - 0.03388889133930206, - -0.020511148497462273, - 0.5027827024459839, - -0.20729811489582065, - 0.48107290267944336, - 0.33669769763946533, - -0.5275911688804626, - 0.48271527886390686, - 0.2738940715789795, - -0.033152539283037186, - -0.13629786670207977, - -0.05965912342071533, - -0.26200807094573975, - 0.04002794995903969, - -0.34095603227615356, - -3.986898899078369, - -0.46819332242012024, - -0.422744482755661, - -0.169097900390625, - 0.6008929014205933, - 0.058016058057546616, - -0.11401277780532836, - -0.3077819049358368, - -0.09595538675785063, - 0.6723822355270386, - 0.19367831945419312, - 0.28304359316825867, - 0.1609862744808197, - 0.7567598819732666, - 0.6889985799789429, - 0.06907720118761063, - -0.04188092052936554, - -0.7434936165809631, - 0.13321782648563385, - 0.8456063270568848, - -0.10364038497209548, - -0.45084846019744873, - -0.4758241474628449, - 0.43882066011428833, - -0.6432598829269409, - 0.7217311859130859, - -0.24189773201942444, - 0.12737572193145752, - -1.1008601188659668, - -0.3305315673351288, - 0.14614742994308472, - -0.7819333076477051, - 0.5287120342254639, - -0.055538054555654526, - 0.1877404749393463, - -0.6907662153244019, - 0.5616975426673889, - -0.4611121714115143, - -0.26109233498573303, - -0.12898315489292145, - -0.3724522292613983, - -0.7191406488418579, - -0.4425233602523804, - -0.644108235836029, - 0.8424481153488159, - 0.17532426118850708, - -0.5121750235557556, - -0.6467239260673523, - -0.0008507720194756985, - 0.7866212129592896, - -0.02644744887948036, - -0.005045140627771616, - 0.015782782807946205, - 0.16334445774555206, - -0.1913367658853531, - -0.13697923719882965, - -0.6684983372688293, - 0.18346354365348816, - -0.341105580329895, - 0.5427411198616028, - 0.3779832422733307, - -0.6778115034103394, - -0.2931850254535675, - -0.8805161714553833, - -0.4212774932384491, - -0.5368952751159668, - -1.3937891721725464, - -1.225494146347046, - 0.4276703894138336, - 1.1205668449401855, - -0.6005299687385559, - 0.15732505917549133, - -0.3914784789085388, - -1.357046604156494, - -0.4707142114639282, - -0.1497287154197693, - -0.25035548210144043, - -0.34328439831733704, - 0.39083412289619446, - 0.1623048633337021, - -0.9275814294815063, - -0.6430015563964844, - 0.2973862886428833, - 0.5580436587333679, - -0.6232585310935974, - -0.6611042022705078, - 0.4015969038009643, - -1.0232892036437988, - -0.2585645020008087, - -0.5431421399116516, - 0.5021264553070068, - -0.48601630330085754, - -0.010242084041237833, - 0.5862035155296326, - 0.7316920161247253, - 0.4036808013916016, - 0.4269520044326782, - -0.705938458442688, - 0.7747307419776917, - 0.10164368897676468, - 0.7887958884239197, - -0.9612497091293336, - 0.12755516171455383, - 0.06812842190265656, - -0.022603651508688927, - 0.14722754061222076, - -0.5588505268096924, - -0.20689940452575684, - 0.3557641804218292, - -0.6812759637832642, - 0.2860803008079529, - -0.38954633474349976, - 0.1759403496980667, - -0.5678874850273132, - -0.1692986786365509, - -0.14578519761562347, - 0.5711379051208496, - 1.0208125114440918, - 0.7759483456611633, - -0.372348427772522, - -0.5460885763168335, - 0.7190321683883667, - -0.6914990544319153, - 0.13365162909030914, - -0.4854792356491089, - 0.4054908752441406, - 0.4502798914909363, - -0.3041122555732727, - -0.06726965308189392, - -0.05570871382951737, - -0.0455719493329525, - 0.4785125255584717, - 0.8867972493171692, - 0.4107886850833893, - 0.6121342182159424, - -0.20477132499217987, - -0.5598517656326294, - -0.6443566679954529, - -0.5905212759971619, - -0.5571200251579285, - 0.17573799192905426, - -0.28621870279312134, - 0.1685224026441574, - 0.09719007462263109, - -0.04223639518022537, - -0.28623101115226746, - -0.1449810117483139, - -0.3789580464363098, - -0.5227636098861694, - -0.049728814512491226, - 0.7849089503288269, - 0.16792525351047516, - 0.9849340915679932, - -0.6559549570083618, - 0.35723909735679626, - -0.6822739243507385, - 1.2873116731643677, - 0.19993330538272855, - 0.03512010723352432, - -0.6972134113311768, - 0.18453484773635864, - -0.2437680810689926, - 0.2156416028738022, - 0.5230382680892944, - 0.22020135819911957, - 0.8314080238342285, - 0.15627102553844452, - -0.7330264449119568, - 0.3888184726238251, - -0.22034703195095065, - 0.5457669496536255, - -0.48084837198257446, - -0.45576658844947815, - -0.09287727624177931, - -0.06968110054731369, - 0.35125672817230225, - -0.4278119504451752, - 0.2038476765155792, - 0.11392722278833388, - 0.9433983564376832, - -0.4097744226455689, - 0.035297419875860214, - -0.4274404048919678, - -0.25100165605545044, - 1.0943366289138794, - -0.07634022831916809, - -0.2925529479980469, - -0.7512530088424683, - 0.2649727463722229, - -0.4078235328197479, - -0.3372223973274231, - 0.05190162733197212, - 0.005654910113662481, - -0.0001571219472680241, - -0.35445958375930786, - -0.7837416529655457, - 0.1500556766986847, - 0.4383024573326111, - 0.6099548935890198, - 0.05951934307813645, - -0.21325334906578064, - 0.0199207104742527, - -0.22704418003559113, - -0.6481077671051025, - 0.37442275881767273, - -1.015955924987793, - 0.38637226819992065, - -0.06489371508359909, - -0.494120329618454, - 0.3469836115837097, - 0.15402406454086304, - -0.7660972476005554, - -0.7053225040435791, - -0.25964751839637756, - 0.014004424214363098, - -0.2860170006752014, - -0.17565494775772095, - -0.45117494463920593, - -0.0031954257283359766, - 0.09676837921142578, - -0.514464259147644, - 0.41698193550109863, - -0.21642713248729703, - -0.5398141145706177, - -0.3647628426551819, - 0.37005379796028137, - 0.239425927400589, - -0.08833975344896317, - 0.934946596622467, - -0.48340797424316406, - 0.6241437792778015, - -0.7253676652908325, - -0.04303571209311485, - 1.1125205755233765, - -0.15692919492721558, - -0.2914651036262512, - -0.5117168426513672, - 0.21365483105182648, - 0.4924402534961701, - 0.5269662141799927, - 0.0352792888879776, - -0.149167999625206, - -0.6019760370254517, - 0.08245442807674408, - 0.4900692105293274, - 0.518824577331543, - -0.00005570516441366635, - -0.553304135799408, - 0.22217543423175812, - 0.5047767758369446, - 0.135724738240242, - 1.1511540412902832, - -0.3541218340396881, - -0.9712511897087096, - 0.8353699445724487, - -0.39227569103240967, - -0.9117669463157654, - -0.26349931955337524, - 0.05597023293375969, - 0.20695461332798004, - 0.3178807199001312, - 1.0663238763809204, - 0.5062212347984314, - 0.7288597822189331, - 0.09899299591779707, - 0.553720235824585, - 0.675009548664093, - -0.20067055523395536, - 0.3138423264026642, - -0.6886593103408813, - -0.2910398542881012, - -1.3186300992965698, - -0.4684459865093231, - -0.095743365585804, - -0.1257995069026947, - -0.4858281314373016, - -0.4935407340526581, - -0.3266896903514862, - -0.3928797245025635, - -0.40803104639053345, - -0.9975396394729614, - 0.4229583740234375, - 0.37309643626213074, - 0.4431034922599793, - 0.30364808440208435, - -0.3765178918838501, - 0.5616499185562134, - 0.16904796659946442, - -0.7343707084655762, - 0.2560209631919861, - 0.6166825294494629, - 0.3200829327106476, - -0.4483652710914612, - 0.16224201023578644, - -0.31495288014411926, - -0.42713335156440735, - 0.7270734906196594, - 0.7049484848976135, - -0.0571461021900177, - 0.04477125033736229, - -0.6647796034812927, - 1.183672308921814, - 0.36199676990509033, - 0.046881116926670074, - 0.4515796303749085, - 0.9278061985969543, - 0.31471705436706543, - -0.7073333859443665, - -0.3443860113620758, - 0.5440067052841187, - -0.15020819008350372, - -0.541202962398529, - 0.5203295946121216, - 1.2192286252975464, - -0.9983593225479126, - -0.18758884072303772, - 0.2758221924304962, - -0.6511523723602295, - -0.1584404855966568, - -0.236241415143013, - 0.2692437767982483, - -0.4941152036190033, - 0.4987454116344452, - -0.3331359028816223, - 0.3163745701313019, - 0.745529294013977, - -0.2905873656272888, - 0.13602906465530396, - 0.4679684340953827, - 1.0555986166000366, - 1.075700044631958, - 0.5368486046791077, - -0.5118206739425659, - 0.8668332099914551, - -0.5726966857910156, - -0.7811751961708069, - 0.1938626915216446, - -0.1929349899291992, - 0.1757766306400299, - 0.6384295225143433, - 0.26462844014167786, - 0.9542630314826964, - 0.19313029944896695, - 1.264248013496399, - -0.6304428577423096, - 0.0487106591463089, - -0.16211535036563873, - -0.7894763350486755, - 0.3582514822483063, - -0.04153040423989296, - 0.635784387588501, - 0.6554391980171204, - -0.47010496258735657, - -0.8302040696144104, - -0.1350124627351761, - 0.2568812072277069, - 0.13614831864833832, - -0.2563649117946625, - -1.0434694290161133, - 0.3232482671737671, - 0.47882452607154846, - 0.4298652410507202, - 1.0563770532608032, - -0.28917592763900757, - -0.8533256649971008, - 0.10648339986801147, - 0.6376127004623413, - -0.20832888782024384, - 0.2370245456695557, - 0.0018312990432605147, - -0.2034837007522583, - 0.01051164511591196, - -1.105310082435608, - 0.29724350571632385, - 0.15604574978351593, - 0.1973688006401062, - 0.44394731521606445, - 0.3974513411521912, - -0.13625948131084442, - 0.9571986198425292, - 0.2257384955883026, - 0.2323588728904724, - -0.5583669543266296, - -0.7854922413825989, - 0.1647188365459442, - -1.6098142862319946, - 0.318587988615036, - -0.13399995863437653, - -0.2172701060771942, - -0.767514705657959, - -0.5813586711883545, - -0.3195130527019501, - -0.04894036799669266, - 0.2929930090904236, - -0.8213384747505188, - 0.07181350141763687, - 0.7469993829727173, - 0.6407455801963806, - 0.16365697979927063, - 0.7870153188705444, - 0.6524736881256104, - 0.6399973630905151, - -0.04992736503481865, - -0.03959266096353531, - -0.2512352466583252, - 0.8448855876922607, - -0.1422702670097351, - 0.1216789186000824, - -1.2647287845611572, - 0.5931149125099182, - 0.7186052203178406, - -0.06118432432413101, - -1.1942816972732544, - -0.17677085101604462, - 0.31543800234794617, - -0.32252824306488037, - 0.8255583047866821, - -0.14529970288276672, - -0.2695446312427521, - -0.33378756046295166, - -0.1653425395488739, - 0.1454019844532013, - -0.3920115828514099, - 0.912214994430542, - -0.7279734015464783, - 0.7374742031097412, - 0.933980405330658, - 0.13429680466651917, - -0.514870285987854, - 0.3989711999893189, - -0.11613689363002776, - 0.4022413492202759, - -0.9990655779838562, - -0.33749932050704956, - -0.4334589838981629, - -1.376373291015625, - -0.2993924915790558, - -0.09454808384180068, - -0.01314175222069025, - -0.001090060803107917, - 0.2137461006641388, - 0.2938512861728668, - 0.17508235573768616, - 0.8260607123374939, - -0.7218498587608337, - 0.2414487451314926, - -0.47296759486198425, - -0.3002610504627228, - -1.238540768623352, - 0.08663805574178696, - 0.6805586218833923, - 0.5909030437469482, - -0.42807504534721375, - -0.22887496650218964, - 0.47537800669670105, - -1.0474627017974854, - 0.6338009238243103, - 0.06548397243022919, - 0.4971011281013489, - 1.3484878540039063 - ] - ], - "regenerate": true - } + "default": [ + 0.2, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ], + "toto": [ + 0.1 + ] } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap index e5d28e450..a9c76227a 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap @@ -3,780 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e1", - "desc": "natural vector", + "id": "e2", + "desc": "natural vector + map", "_vectors": { + "toto": [], "default": { "embeddings": [ [ - -0.2979458272457123, - -0.5288640856742859, - -0.019957859069108963, - -0.18495318293571472, - 0.7429973483085632, - 0.5238497257232666, - 0.432366281747818, - 0.32744166254997253, - 0.0020762972999364138, - -0.9507834911346436, - -0.35097137093544006, - 0.08469701558351517, - -1.4176613092422483, - 0.4647577106952667, - -0.69340580701828, - 1.0372896194458008, - 0.3716741800308227, - 0.06031008064746857, - -0.6152024269104004, - 0.007914665155112743, - 0.7954924702644348, - -0.20773003995418549, - 0.09376765787601472, - 0.04508133605122566, - -0.2084471583366394, - -0.1518009901046753, - 0.018195509910583496, - -0.07044368237257004, - -0.18119366466999057, - -0.4480230510234833, - 0.3822529911994934, - 0.1911812424659729, - 0.4674372375011444, - 0.06963984668254852, - -0.09341949224472046, - 0.005675444379448891, - -0.6774799227714539, - -0.7066726684570313, - -0.39256376028060913, - 0.04005039855837822, - 0.2084812968969345, - -0.7872875928878784, - -0.8205880522727966, - 0.2919981777667999, - -0.06004738807678223, - -0.4907574355602264, - -1.5937862396240234, - 0.24249385297298431, - -0.14709846675395966, - -0.11860740929841997, - -0.8299489617347717, - 0.472964346408844, - -0.497518390417099, - -0.22205302119255063, - -0.4196169078350067, - 0.32697558403015137, - -0.360930860042572, - -0.9789686799049376, - 0.1887447088956833, - -0.403737336397171, - 0.18524253368377688, - 0.3768732249736786, - 0.3666233420372009, - 0.3511938452720642, - 0.6985810995101929, - 0.41721710562705994, - 0.09754953533411026, - 0.6204307079315186, - -1.0762996673583984, - -0.06263761967420578, - -0.7376511693000793, - 0.6849768161773682, - -0.1745152473449707, - -0.40449759364128113, - 0.20757411420345304, - -0.8424443006515503, - 0.330015629529953, - 0.3489064872264862, - 1.0954371690750122, - 0.8487558960914612, - 1.1076823472976685, - 0.61430823802948, - 0.4155903458595276, - 0.4111340939998626, - 0.05753209814429283, - -0.06429877132177353, - -0.765606164932251, - -0.41703930497169495, - -0.508820652961731, - 0.19859947264194489, - -0.16607828438282013, - -0.28112146258354187, - 0.11032675206661224, - 0.38809511065483093, - -0.36498191952705383, - -0.48671194911003113, - 0.6755134463310242, - 0.03958442434668541, - 0.4478721618652344, - -0.10335399955511092, - -0.9546685814857484, - -0.6087718605995178, - 0.17498846352100372, - 0.08320838958024979, - -1.4478336572647097, - -0.605027437210083, - -0.5867993235588074, - -0.14711688458919525, - -0.5447602272033691, - -0.026259321719408035, - -0.6997418403625488, - -0.07349082082509995, - 0.10638900846242905, - -0.7133527398109436, - -0.9396815299987792, - 1.087092399597168, - 1.1885089874267578, - 0.4011896848678589, - -0.4089202582836151, - -0.10938972979784012, - 0.6726722121238708, - 0.24576938152313232, - -0.24247920513153076, - 1.1499971151351929, - 0.47813335061073303, - -0.05331678315997124, - 0.32338133454322815, - 0.4870913326740265, - -0.23144258558750153, - -1.2023426294326782, - 0.2349330335855484, - 1.080536961555481, - 0.29334118962287903, - 0.391574501991272, - -0.15818795561790466, - -0.2948290705680847, - -0.024689948186278343, - 0.06602869182825089, - 0.5937030911445618, - -0.047901444137096405, - -0.512734591960907, - -0.35780075192451477, - 0.28751692175865173, - 0.4298716187477112, - 0.9242428541183472, - -0.17208744585514069, - 0.11515070497989656, - -0.0335976779460907, - -0.3422986567020416, - 0.5344581604003906, - 0.19895796477794647, - 0.33001241087913513, - 0.6390730142593384, - -0.6074934005737305, - -0.2553696632385254, - 0.9644920229911804, - 0.2699219584465027, - 0.6403993368148804, - -0.6380003690719604, - -0.027310986071825027, - 0.638815701007843, - 0.27719101309776306, - -0.13553589582443237, - 0.750195324420929, - 0.1224869191646576, - -0.20613941550254825, - 0.8444448709487915, - 0.16200250387191772, - -0.24750925600528717, - -0.739950954914093, - -0.28443849086761475, - -1.176282525062561, - 0.516107976436615, - 0.3774825632572174, - 0.10906043648719788, - 0.07962015271186829, - 0.7384604215621948, - -0.051241904497146606, - 1.1730090379714966, - -0.4828610122203827, - -1.404372215270996, - 0.8811132311820984, - -0.3839482367038727, - 0.022516896948218346, - -0.0491158664226532, - -0.43027013540267944, - 1.2049334049224854, - -0.27309560775756836, - 0.6883630752563477, - 0.8264574408531189, - -0.5020735263824463, - -0.4874092042446137, - 0.6007202863693237, - -0.4965405762195587, - 1.1302915811538696, - 0.032572727650403976, - -0.3731859028339386, - 0.658271849155426, - -0.9023059010505676, - 0.7400162220001221, - 0.014550759457051754, - -0.19699542224407196, - 0.2319706380367279, - -0.789058268070221, - -0.14905710518360138, - -0.5826214551925659, - 0.207652747631073, - -0.4507439732551574, - -0.3163885474205017, - 0.3604124188423157, - -0.45119962096214294, - 0.3428427278995514, - 0.3005594313144684, - -0.36026081442832947, - 1.1014249324798584, - 0.40884315967559814, - 0.34991952776908875, - -0.1806638240814209, - 0.27440476417541504, - -0.7118373513221741, - 0.4645499587059021, - 0.214790478348732, - -0.2343102991580963, - 0.10500429570674896, - -0.28034430742263794, - 1.2267805337905884, - 1.0561333894729614, - -0.497364342212677, - -0.6143305897712708, - 0.24963727593421936, - -0.33136463165283203, - -0.01473914459347725, - 0.495918869972229, - -0.6985538005828857, - -1.0033197402954102, - 0.35937801003456116, - 0.6325868368148804, - -0.6808838844299316, - 1.0354058742523191, - -0.7214401960372925, - -0.33318862318992615, - 0.874398410320282, - -0.6594992280006409, - 0.6830640435218811, - -0.18534131348133087, - 0.024834271520376205, - 0.19901277124881744, - -0.5992477536201477, - -1.2126628160476685, - -0.9245557188987732, - -0.3898217976093292, - -0.1286519467830658, - 0.4217943847179413, - -0.1143646091222763, - 0.5630772709846497, - -0.5240639448165894, - 0.21152715384960177, - -0.3792001008987427, - 0.8266305327415466, - 1.170984387397766, - -0.8072142004966736, - 0.11382893472909927, - -0.17953898012638092, - -0.1789460331201553, - -0.15078622102737427, - -1.2082908153533936, - -0.7812382578849792, - -0.10903695970773696, - 0.7303897142410278, - -0.39054441452026367, - 0.19511254131793976, - -0.09121843427419662, - 0.22400228679180145, - 0.30143046379089355, - 0.1141919493675232, - 0.48112115263938904, - 0.7307931780815125, - 0.09701362252235413, - -0.2795647978782654, - -0.3997688889503479, - 0.5540812611579895, - 0.564578115940094, - -0.40065160393714905, - -0.3629159033298493, - -0.3789091110229492, - -0.7298538088798523, - -0.6996853351593018, - -0.4477842152118683, - -0.289089560508728, - -0.6430277824401855, - 0.2344944179058075, - 0.3742927014827728, - -0.5079357028007507, - 0.28841453790664673, - 0.06515737622976303, - 0.707315981388092, - 0.09498685598373412, - 0.8365515470504761, - 0.10002726316452026, - -0.7695478200912476, - 0.6264724135398865, - 0.7562043070793152, - -0.23112858831882477, - -0.2871039807796478, - -0.25010058283805847, - 0.2783474028110504, - -0.03224996477365494, - -0.9119359850883484, - -3.6940200328826904, - -0.5099936127662659, - -0.1604711413383484, - 0.17453284561634064, - 0.41759559512138367, - 0.1419190913438797, - -0.11362407356500626, - -0.33312007784843445, - 0.11511333286762238, - 0.4667884409427643, - -0.0031647447030991316, - 0.15879854559898376, - 0.3042248487472534, - 0.5404849052429199, - 0.8515422344207764, - 0.06286454200744629, - 0.43790125846862793, - -0.8682025074958801, - -0.06363756954669952, - 0.5547921657562256, - -0.01483887154608965, - -0.07361344993114471, - -0.929947018623352, - 0.3502565622329712, - -0.5080993175506592, - 1.0380364656448364, - -0.2017953395843506, - 0.21319580078125, - -1.0763001441955566, - -0.556368887424469, - 0.1949922740459442, - -0.6445739269256592, - 0.6791343688964844, - 0.21188358962535855, - 0.3736183941364288, - -0.21800459921360016, - 0.7597446441650391, - -0.3732394874095917, - -0.4710160195827484, - 0.025146087631583217, - 0.05341297015547752, - -0.9522109627723694, - -0.6000866889953613, - -0.08469046652317047, - 0.5966026186943054, - 0.3444081246852875, - -0.461188405752182, - -0.5279349088668823, - 0.10296865552663804, - 0.5175143480300903, - -0.20671147108078003, - 0.13392412662506104, - 0.4812754988670349, - 0.2993808686733246, - -0.3005635440349579, - 0.5141698122024536, - -0.6239235401153564, - 0.2877119481563568, - -0.4452739953994751, - 0.5621107816696167, - 0.5047508478164673, - -0.4226335883140564, - -0.18578553199768064, - -1.1967322826385498, - 0.28178197145462036, - -0.8692031502723694, - -1.1812998056411743, - -1.4526212215423584, - 0.4645712077617645, - 0.9327932000160216, - -0.6560136675834656, - 0.461549699306488, - -0.5621527433395386, - -1.328449010848999, - -0.08676894754171371, - 0.00021918353741057217, - -0.18864136934280396, - 0.1259666532278061, - 0.18240638077259064, - -0.14919660985469818, - -0.8965857625007629, - -0.7539900541305542, - 0.013973715715110302, - 0.504276692867279, - -0.704748272895813, - -0.6428424119949341, - 0.6303996443748474, - -0.5404738187789917, - -0.31176653504371643, - -0.21262824535369873, - 0.18736739456653595, - -0.7998970746994019, - 0.039946746081113815, - 0.7390344738960266, - 0.4283199906349182, - 0.3795057237148285, - 0.07204607129096985, - -0.9230587482452391, - 0.9440426230430604, - 0.26272690296173096, - 0.5598306655883789, - -1.0520871877670288, - -0.2677186131477356, - -0.1888762265443802, - 0.30426350235939026, - 0.4746131896972656, - -0.5746733546257019, - -0.4197768568992615, - 0.8565112948417664, - -0.6767723560333252, - 0.23448683321475983, - -0.2010004222393036, - 0.4112907350063324, - -0.6497949957847595, - -0.418667733669281, - -0.4950824975967407, - 0.44438859820365906, - 1.026281714439392, - 0.482397586107254, - -0.26220494508743286, - -0.3640787005424499, - 0.5907743573188782, - -0.8771642446517944, - 0.09708411991596222, - -0.3671700060367584, - 0.4331349730491638, - 0.619417667388916, - -0.2684665620326996, - -0.5123821496963501, - -0.1502324342727661, - -0.012190685607492924, - 0.3580845892429352, - 0.8617186546325684, - 0.3493645489215851, - 1.0270192623138428, - 0.18297909200191495, - -0.5881339311599731, - -0.1733516901731491, - -0.5040576457977295, - -0.340370237827301, - -0.26767754554748535, - -0.28570041060447693, - -0.032928116619586945, - 0.6029254794120789, - 0.17397655546665192, - 0.09346921741962431, - 0.27815181016921997, - -0.46699589490890503, - -0.8148876428604126, - -0.3964351713657379, - 0.3812595009803772, - 0.13547226786613464, - 0.7126688361167908, - -0.3473474085330963, - -0.06573959439992905, - -0.6483767032623291, - 1.4808889627456665, - 0.30924928188323975, - -0.5085946917533875, - -0.8613000512123108, - 0.3048902451992035, - -0.4241599142551422, - 0.15909206867218018, - 0.5764641761779785, - -0.07879110425710678, - 1.015336513519287, - 0.07599356025457382, - -0.7025855779647827, - 0.30047643184661865, - -0.35094937682151794, - 0.2522146999835968, - -0.2338722199201584, - -0.8326804637908936, - -0.13695412874221802, - -0.03452421352267265, - 0.47974953055381775, - -0.18385636806488037, - 0.32438594102859497, - 0.1797013282775879, - 0.787494957447052, - -0.12579888105392456, - -0.07507286965847015, - -0.4389670491218567, - 0.2720070779323578, - 0.8138866424560547, - 0.01974171027541161, - -0.3057698905467987, - -0.6709924936294556, - 0.0885881632566452, - -0.2862754464149475, - 0.03475658595561981, - -0.1285519152879715, - 0.3838353455066681, - -0.2944154739379883, - -0.4204859137535095, - -0.4416137933731079, - 0.13426260650157928, - 0.36733248829841614, - 0.573428750038147, - -0.14928072690963745, - -0.026076916605234143, - 0.33286052942276, - -0.5340145826339722, - -0.17279052734375, - -0.01154550164937973, - -0.6620771884918213, - 0.18390542268753052, - -0.08265615254640579, - -0.2489682286977768, - 0.2429984211921692, - -0.044153645634651184, - -0.986578404903412, - -0.33574509620666504, - -0.5387663841247559, - 0.19767941534519196, - 0.12540718913078308, - -0.3403128981590271, - -0.4154576361179352, - 0.17275673151016235, - 0.09407442808151244, - -0.5414086580276489, - 0.4393929839134216, - 0.1725579798221588, - -0.4998118281364441, - -0.6926208138465881, - 0.16552448272705078, - 0.6659538149833679, - -0.10949844866991044, - 0.986426830291748, - 0.01748848147690296, - 0.4003709554672241, - -0.5430638194084167, - 0.35347291827201843, - 0.6887399554252625, - 0.08274628221988678, - 0.13407137989997864, - -0.591465950012207, - 0.3446292281150818, - 0.6069018244743347, - 0.1935492902994156, - -0.0989871397614479, - 0.07008486241102219, - -0.8503749370574951, - -0.09507356584072112, - 0.6259510517120361, - 0.13934025168418884, - 0.06392545253038406, - -0.4112265408039093, - -0.08475656062364578, - 0.4974113404750824, - -0.30606114864349365, - 1.111435890197754, - -0.018766529858112335, - -0.8422622680664063, - 0.4325508773326874, - -0.2832120656967163, - -0.4859798848628998, - -0.41498348116874695, - 0.015977520495653152, - 0.5292825698852539, - 0.4538311660289765, - 1.1328668594360352, - 0.22632671892642975, - 0.7918671369552612, - 0.33401933312416077, - 0.7306135296821594, - 0.3548600673675537, - 0.12506209313869476, - 0.8573207855224609, - -0.5818327069282532, - -0.6953738927841187, - -1.6171947717666626, - -0.1699674427509308, - 0.6318262815475464, - -0.05671752244234085, - -0.28145185112953186, - -0.3976689279079437, - -0.2041076272726059, - -0.5495951175689697, - -0.5152917504310608, - -0.9309796094894408, - 0.101932130753994, - 0.1367802917957306, - 0.1490798443555832, - 0.5304336547851563, - -0.5082434415817261, - 0.06688683480024338, - 0.14657628536224365, - -0.782435953617096, - 0.2962816655635834, - 0.6965363621711731, - 0.8496337532997131, - -0.3042965829372406, - 0.04343798756599426, - 0.0330701619386673, - -0.5662598013877869, - 1.1086925268173218, - 0.756072998046875, - -0.204134538769722, - 0.2404300570487976, - -0.47848284244537354, - 1.3659011125564575, - 0.5645433068275452, - -0.15836156904697418, - 0.43395575881004333, - 0.5944653749465942, - 1.0043466091156006, - -0.49446743726730347, - -0.5954391360282898, - 0.5341240763664246, - 0.020598189905285835, - -0.4036853015422821, - 0.4473709762096405, - 1.1998231410980225, - -0.9317775368690492, - -0.23321466147899628, - 0.2052552700042725, - -0.7423108816146851, - -0.19917210936546328, - -0.1722569614648819, - -0.034072667360305786, - -0.00671181408688426, - 0.46396249532699585, - -0.1372445821762085, - 0.053376372903585434, - 0.7392690777778625, - -0.38447609543800354, - 0.07497968524694443, - 0.5197252631187439, - 1.3746477365493774, - 0.9060075879096984, - 0.20000585913658145, - -0.4053704142570496, - 0.7497360110282898, - -0.34087055921554565, - -1.101803183555603, - 0.273650586605072, - -0.5125769376754761, - 0.22472351789474487, - 0.480757474899292, - -0.19845178723335263, - 0.8857700824737549, - 0.30752456188201904, - 1.1109285354614258, - -0.6768012642860413, - 0.524367094039917, - -0.22495046257972717, - -0.4224412739276886, - 0.40753406286239624, - -0.23133376240730288, - 0.3297771215438843, - 0.4905449151992798, - -0.6813114285469055, - -0.7543983459472656, - -0.5599071383476257, - 0.14351597428321838, - -0.029278717935085297, - -0.3970443606376648, - -0.303079217672348, - 0.24161772429943085, - 0.008353390730917454, - -0.0062365154735744, - 1.0824860334396362, - -0.3704061508178711, - -1.0337258577346802, - 0.04638749733567238, - 1.163011074066162, - -0.31737643480300903, - 0.013986887410283089, - 0.19223114848136905, - -0.2260770797729492, - -0.210910826921463, - -1.0191949605941772, - 0.22356095910072327, - 0.09353553503751756, - 0.18096882104873657, - 0.14867214858531952, - 0.43408671021461487, - -0.33312076330184937, - 0.8173948526382446, - 0.6428242921829224, - 0.20215003192424777, - -0.6634518504142761, - -0.4132290482521057, - 0.29815030097961426, - -1.579406976699829, - -0.0981958732008934, - -0.03941014781594277, - 0.1709178239107132, - -0.5481140613555908, - -0.5338194966316223, - -0.3528362512588501, - -0.11561278253793716, - -0.21793591976165771, - -1.1570470333099363, - 0.2157980799674988, - 0.42083489894866943, - 0.9639263153076172, - 0.09747201204299928, - 0.15671424567699432, - 0.4034591615200043, - 0.6728067994117737, - -0.5216875672340393, - 0.09657668322324751, - -0.2416689097881317, - 0.747975766658783, - 0.1021689772605896, - 0.11652665585279463, - -1.0484966039657593, - 0.8489304780960083, - 0.7169828414916992, - -0.09012343734502792, - -1.3173753023147583, - 0.057890523225069046, - -0.006231260951608419, - -0.1018214002251625, - 0.936040461063385, - -0.0502331368625164, - -0.4284322261810303, - -0.38209280371665955, - -0.22668412327766416, - 0.0782942995429039, - -0.4881664514541626, - 0.9268959760665894, - 0.001867273123934865, - 0.42261114716529846, - 0.8283362984657288, - 0.4256294071674347, - -0.7965338826179504, - 0.4840078353881836, - -0.19861412048339844, - 0.33977967500686646, - -0.4604192078113556, - -0.3107339143753052, - -0.2839638590812683, - -1.5734281539916992, - 0.005220232997089624, - 0.09239906817674635, - -0.7828494906425476, - -0.1397123783826828, - 0.2576255202293396, - 0.21372435986995697, - -0.23169949650764465, - 0.4016408920288086, - -0.462497353553772, - -0.2186472862958908, - -0.5617868900299072, - -0.3649831712245941, - -1.1585862636566162, - -0.08222806453704834, - 0.931126832962036, - 0.4327389597892761, - -0.46451422572135925, - -0.5430706143379211, - -0.27434298396110535, - -0.9479129314422609, - 0.1845661848783493, - 0.3972720205783844, - 0.4883299469947815, - 1.04031240940094 + -0.05189208313822746, + -0.9273212552070618, + 0.1443813145160675, + 0.0932632014155388, + 0.2665371894836426, + 0.36266782879829407, + 0.6402910947799683, + 0.32014018297195435, + 0.030915971845388412, + -0.9312191605567932, + -0.3718109726905823, + -0.2700554132461548, + -1.1014580726623535, + 0.9154956936836244, + -0.3406888246536255, + 1.0077725648880005, + 0.6577560901641846, + -0.3955195546150207, + -0.4148270785808563, + 0.1855088472366333, + 0.5062315464019775, + -0.3632686734199524, + -0.2277890294790268, + 0.2560805082321167, + -0.3853609561920166, + -0.1604762226343155, + -0.13947471976280212, + -0.20147813856601715, + -0.4466346800327301, + -0.3761846721172333, + 0.1443382054567337, + 0.18205296993255615, + 0.49359792470932007, + -0.22538000345230105, + -0.4996317625045776, + -0.22734887897968292, + -0.6034309267997742, + -0.7857939600944519, + -0.34923747181892395, + -0.3466345965862274, + 0.21176661550998688, + -0.5101462006568909, + -0.3403083384037018, + 0.000315118464641273, + 0.236465722322464, + -0.10246097296476364, + -1.3013339042663574, + 0.3419138789176941, + -0.32963496446609497, + -0.0901619717478752, + -0.5426247119903564, + 0.22656650841236117, + -0.44758284091949463, + 0.14151698350906372, + -0.1089438870549202, + 0.5500766634941101, + -0.670711100101471, + -0.6227269768714905, + 0.3894464075565338, + -0.27609574794769287, + 0.7028202414512634, + -0.19697771966457367, + 0.328511506319046, + 0.5063360929489136, + 0.4065195322036743, + 0.2614171802997589, + -0.30274391174316406, + 1.0393824577331543, + -0.7742937207221985, + -0.7874112129211426, + -0.6749666929244995, + 0.5190866589546204, + 0.004123548045754433, + -0.28312963247299194, + -0.038731709122657776, + -1.0142987966537476, + -0.09519586712121964, + 0.8755272626876831, + 0.4876938760280609, + 0.7811151742935181, + 0.85174959897995, + 0.11826585978269576, + 0.5373436808586121, + 0.3649002015590668, + 0.19064077734947205, + -0.00287026260048151, + -0.7305403351783752, + -0.015206154435873032, + -0.7899249196052551, + 0.19407285749912265, + 0.08596625179052353, + -0.28976231813430786, + -0.1525907665491104, + 0.3798313438892365, + 0.050306469202041626, + -0.5697937607765198, + 0.4219021201133728, + 0.276252806186676, + 0.1559903472661972, + 0.10030482709407806, + -0.4043720066547394, + -0.1969818025827408, + 0.5739826560020447, + 0.2116064727306366, + -1.4620544910430908, + -0.7802462577819824, + -0.24739810824394223, + -0.09791352599859238, + -0.4413802027702331, + 0.21549351513385773, + -0.9520436525344848, + -0.08762510865926743, + 0.08154498040676117, + -0.6154940724372864, + -1.01079523563385, + 0.885427713394165, + 0.6967288851737976, + 0.27186504006385803, + -0.43194177746772766, + -0.11248451471328735, + 0.7576630711555481, + 0.4998855590820313, + 0.0264343973249197, + 0.9872855544090272, + 0.5634694695472717, + 0.053698331117630005, + 0.19410227239131927, + 0.3570743501186371, + -0.23670297861099243, + -0.9114483594894408, + 0.07884842902421951, + 0.7318344116210938, + 0.44630110263824463, + 0.08745364099740982, + -0.347101628780365, + -0.4314247667789459, + -0.5060274004936218, + 0.003706763498485088, + 0.44320008158683777, + -0.00788921769708395, + -0.1368623524904251, + -0.17391923069953918, + 0.14473655819892883, + 0.10927865654230118, + 0.6974599361419678, + 0.005052129738032818, + -0.016953065991401672, + -0.1256176233291626, + -0.036742497235536575, + 0.5591985583305359, + -0.37619709968566895, + 0.22429119050502777, + 0.5403043031692505, + -0.8603790998458862, + -0.3456307053565979, + 0.9292937517166138, + 0.5074859261512756, + 0.6310645937919617, + -0.3091641068458557, + 0.46902573108673096, + 0.7891915440559387, + 0.4499550759792328, + 0.2744995653629303, + 0.2712305784225464, + -0.04349074140191078, + -0.3638863265514374, + 0.7839881777763367, + 0.7352104783058167, + -0.19457511603832245, + -0.5957832932472229, + -0.43704694509506226, + -1.084769368171692, + 0.4904985725879669, + 0.5385226011276245, + 0.1891629993915558, + 0.12338479608297348, + 0.8315675258636475, + -0.07830192148685455, + 1.0916285514831543, + -0.28066861629486084, + -1.3585069179534912, + 0.5203898549079895, + 0.08678033947944641, + -0.2566044330596924, + 0.09484415501356123, + -0.0180208683013916, + 1.0264745950698853, + -0.023572135716676712, + 0.5864979028701782, + 0.7625196576118469, + -0.2543414533138275, + -0.8877770900726318, + 0.7611982822418213, + -0.06220436468720436, + 0.937336564064026, + 0.2704363465309143, + -0.37733694911003113, + 0.5076137781143188, + -0.30641937255859375, + 0.6252772808074951, + -0.0823579877614975, + -0.03736555948853493, + 0.4131673276424408, + -0.6514252424240112, + 0.12918265163898468, + -0.4483584463596344, + 0.6750786304473877, + -0.37008383870124817, + -0.02324833907186985, + 0.38027650117874146, + -0.26374951004981995, + 0.4346931278705597, + 0.42882832884788513, + -0.48798441886901855, + 1.1882442235946655, + 0.5132288336753845, + 0.5284568667411804, + -0.03538886830210686, + 0.29620853066444397, + -1.0683696269989014, + 0.25936177372932434, + 0.10404160618782043, + -0.25796034932136536, + 0.027896970510482788, + -0.09225251525640488, + 1.4811025857925415, + 0.641173779964447, + -0.13838383555412292, + -0.3437179923057556, + 0.5667019486427307, + -0.5400741696357727, + 0.31090837717056274, + 0.6470608115196228, + -0.3747067153453827, + -0.7364534735679626, + -0.07431528717279434, + 0.5173454880714417, + -0.6578747034072876, + 0.7107478976249695, + -0.7918999791145325, + -0.0648345872759819, + 0.609937846660614, + -0.7329513430595398, + 0.9741371870040894, + 0.17912346124649048, + -0.02658769302070141, + 0.5162150859832764, + -0.3978803157806397, + -0.7833885550498962, + -0.6497276425361633, + -0.3898126780986786, + -0.0952848568558693, + 0.2663288116455078, + -0.1604052186012268, + 0.373076468706131, + -0.8357769250869751, + -0.05217683315277099, + -0.2680160701274872, + 0.8389158248901367, + 0.6833611130714417, + -0.6712407469749451, + 0.7406917214393616, + -0.44522786140441895, + -0.34645363688468933, + -0.27384576201438904, + -0.9878405928611756, + -0.8166060447692871, + 0.06268279999494553, + 0.38567957282066345, + -0.3274703919887543, + 0.5296315550804138, + -0.11810623109340668, + 0.23029841482639313, + 0.08616159111261368, + -0.2195747196674347, + 0.09430307894945145, + 0.4057176411151886, + 0.4892159104347229, + -0.1636916548013687, + -0.6071445345878601, + 0.41256585717201233, + 0.622254490852356, + -0.41223976016044617, + -0.6686707139015198, + -0.7474371790885925, + -0.8509522080421448, + -0.16754287481307983, + -0.9078601002693176, + -0.29653599858283997, + -0.5020652413368225, + 0.4692700505256653, + 0.01281109917908907, + -0.16071580350399017, + 0.03388889133930206, + -0.020511148497462273, + 0.5027827024459839, + -0.20729811489582065, + 0.48107290267944336, + 0.33669769763946533, + -0.5275911688804626, + 0.48271527886390686, + 0.2738940715789795, + -0.033152539283037186, + -0.13629786670207977, + -0.05965912342071533, + -0.26200807094573975, + 0.04002794995903969, + -0.34095603227615356, + -3.986898899078369, + -0.46819332242012024, + -0.422744482755661, + -0.169097900390625, + 0.6008929014205933, + 0.058016058057546616, + -0.11401277780532836, + -0.3077819049358368, + -0.09595538675785063, + 0.6723822355270386, + 0.19367831945419312, + 0.28304359316825867, + 0.1609862744808197, + 0.7567598819732666, + 0.6889985799789429, + 0.06907720118761063, + -0.04188092052936554, + -0.7434936165809631, + 0.13321782648563385, + 0.8456063270568848, + -0.10364038497209548, + -0.45084846019744873, + -0.4758241474628449, + 0.43882066011428833, + -0.6432598829269409, + 0.7217311859130859, + -0.24189773201942444, + 0.12737572193145752, + -1.1008601188659668, + -0.3305315673351288, + 0.14614742994308472, + -0.7819333076477051, + 0.5287120342254639, + -0.055538054555654526, + 0.1877404749393463, + -0.6907662153244019, + 0.5616975426673889, + -0.4611121714115143, + -0.26109233498573303, + -0.12898315489292145, + -0.3724522292613983, + -0.7191406488418579, + -0.4425233602523804, + -0.644108235836029, + 0.8424481153488159, + 0.17532426118850708, + -0.5121750235557556, + -0.6467239260673523, + -0.0008507720194756985, + 0.7866212129592896, + -0.02644744887948036, + -0.005045140627771616, + 0.015782782807946205, + 0.16334445774555206, + -0.1913367658853531, + -0.13697923719882965, + -0.6684983372688293, + 0.18346354365348816, + -0.341105580329895, + 0.5427411198616028, + 0.3779832422733307, + -0.6778115034103394, + -0.2931850254535675, + -0.8805161714553833, + -0.4212774932384491, + -0.5368952751159668, + -1.3937891721725464, + -1.225494146347046, + 0.4276703894138336, + 1.1205668449401855, + -0.6005299687385559, + 0.15732505917549133, + -0.3914784789085388, + -1.357046604156494, + -0.4707142114639282, + -0.1497287154197693, + -0.25035548210144043, + -0.34328439831733704, + 0.39083412289619446, + 0.1623048633337021, + -0.9275814294815063, + -0.6430015563964844, + 0.2973862886428833, + 0.5580436587333679, + -0.6232585310935974, + -0.6611042022705078, + 0.4015969038009643, + -1.0232892036437988, + -0.2585645020008087, + -0.5431421399116516, + 0.5021264553070068, + -0.48601630330085754, + -0.010242084041237833, + 0.5862035155296326, + 0.7316920161247253, + 0.4036808013916016, + 0.4269520044326782, + -0.705938458442688, + 0.7747307419776917, + 0.10164368897676468, + 0.7887958884239197, + -0.9612497091293336, + 0.12755516171455383, + 0.06812842190265656, + -0.022603651508688927, + 0.14722754061222076, + -0.5588505268096924, + -0.20689940452575684, + 0.3557641804218292, + -0.6812759637832642, + 0.2860803008079529, + -0.38954633474349976, + 0.1759403496980667, + -0.5678874850273132, + -0.1692986786365509, + -0.14578519761562347, + 0.5711379051208496, + 1.0208125114440918, + 0.7759483456611633, + -0.372348427772522, + -0.5460885763168335, + 0.7190321683883667, + -0.6914990544319153, + 0.13365162909030914, + -0.4854792356491089, + 0.4054908752441406, + 0.4502798914909363, + -0.3041122555732727, + -0.06726965308189392, + -0.05570871382951737, + -0.0455719493329525, + 0.4785125255584717, + 0.8867972493171692, + 0.4107886850833893, + 0.6121342182159424, + -0.20477132499217987, + -0.5598517656326294, + -0.6443566679954529, + -0.5905212759971619, + -0.5571200251579285, + 0.17573799192905426, + -0.28621870279312134, + 0.1685224026441574, + 0.09719007462263109, + -0.04223639518022537, + -0.28623101115226746, + -0.1449810117483139, + -0.3789580464363098, + -0.5227636098861694, + -0.049728814512491226, + 0.7849089503288269, + 0.16792525351047516, + 0.9849340915679932, + -0.6559549570083618, + 0.35723909735679626, + -0.6822739243507385, + 1.2873116731643677, + 0.19993330538272855, + 0.03512010723352432, + -0.6972134113311768, + 0.18453484773635864, + -0.2437680810689926, + 0.2156416028738022, + 0.5230382680892944, + 0.22020135819911957, + 0.8314080238342285, + 0.15627102553844452, + -0.7330264449119568, + 0.3888184726238251, + -0.22034703195095065, + 0.5457669496536255, + -0.48084837198257446, + -0.45576658844947815, + -0.09287727624177931, + -0.06968110054731369, + 0.35125672817230225, + -0.4278119504451752, + 0.2038476765155792, + 0.11392722278833388, + 0.9433983564376832, + -0.4097744226455689, + 0.035297419875860214, + -0.4274404048919678, + -0.25100165605545044, + 1.0943366289138794, + -0.07634022831916809, + -0.2925529479980469, + -0.7512530088424683, + 0.2649727463722229, + -0.4078235328197479, + -0.3372223973274231, + 0.05190162733197212, + 0.005654910113662481, + -0.0001571219472680241, + -0.35445958375930786, + -0.7837416529655457, + 0.1500556766986847, + 0.4383024573326111, + 0.6099548935890198, + 0.05951934307813645, + -0.21325334906578064, + 0.0199207104742527, + -0.22704418003559113, + -0.6481077671051025, + 0.37442275881767273, + -1.015955924987793, + 0.38637226819992065, + -0.06489371508359909, + -0.494120329618454, + 0.3469836115837097, + 0.15402406454086304, + -0.7660972476005554, + -0.7053225040435791, + -0.25964751839637756, + 0.014004424214363098, + -0.2860170006752014, + -0.17565494775772095, + -0.45117494463920593, + -0.0031954257283359766, + 0.09676837921142578, + -0.514464259147644, + 0.41698193550109863, + -0.21642713248729703, + -0.5398141145706177, + -0.3647628426551819, + 0.37005379796028137, + 0.239425927400589, + -0.08833975344896317, + 0.934946596622467, + -0.48340797424316406, + 0.6241437792778015, + -0.7253676652908325, + -0.04303571209311485, + 1.1125205755233765, + -0.15692919492721558, + -0.2914651036262512, + -0.5117168426513672, + 0.21365483105182648, + 0.4924402534961701, + 0.5269662141799927, + 0.0352792888879776, + -0.149167999625206, + -0.6019760370254517, + 0.08245442807674408, + 0.4900692105293274, + 0.518824577331543, + -0.00005570516441366635, + -0.553304135799408, + 0.22217543423175812, + 0.5047767758369446, + 0.135724738240242, + 1.1511540412902832, + -0.3541218340396881, + -0.9712511897087096, + 0.8353699445724487, + -0.39227569103240967, + -0.9117669463157654, + -0.26349931955337524, + 0.05597023293375969, + 0.20695461332798004, + 0.3178807199001312, + 1.0663238763809204, + 0.5062212347984314, + 0.7288597822189331, + 0.09899299591779707, + 0.553720235824585, + 0.675009548664093, + -0.20067055523395536, + 0.3138423264026642, + -0.6886593103408813, + -0.2910398542881012, + -1.3186300992965698, + -0.4684459865093231, + -0.095743365585804, + -0.1257995069026947, + -0.4858281314373016, + -0.4935407340526581, + -0.3266896903514862, + -0.3928797245025635, + -0.40803104639053345, + -0.9975396394729614, + 0.4229583740234375, + 0.37309643626213074, + 0.4431034922599793, + 0.30364808440208435, + -0.3765178918838501, + 0.5616499185562134, + 0.16904796659946442, + -0.7343707084655762, + 0.2560209631919861, + 0.6166825294494629, + 0.3200829327106476, + -0.4483652710914612, + 0.16224201023578644, + -0.31495288014411926, + -0.42713335156440735, + 0.7270734906196594, + 0.7049484848976135, + -0.0571461021900177, + 0.04477125033736229, + -0.6647796034812927, + 1.183672308921814, + 0.36199676990509033, + 0.046881116926670074, + 0.4515796303749085, + 0.9278061985969543, + 0.31471705436706543, + -0.7073333859443665, + -0.3443860113620758, + 0.5440067052841187, + -0.15020819008350372, + -0.541202962398529, + 0.5203295946121216, + 1.2192286252975464, + -0.9983593225479126, + -0.18758884072303772, + 0.2758221924304962, + -0.6511523723602295, + -0.1584404855966568, + -0.236241415143013, + 0.2692437767982483, + -0.4941152036190033, + 0.4987454116344452, + -0.3331359028816223, + 0.3163745701313019, + 0.745529294013977, + -0.2905873656272888, + 0.13602906465530396, + 0.4679684340953827, + 1.0555986166000366, + 1.075700044631958, + 0.5368486046791077, + -0.5118206739425659, + 0.8668332099914551, + -0.5726966857910156, + -0.7811751961708069, + 0.1938626915216446, + -0.1929349899291992, + 0.1757766306400299, + 0.6384295225143433, + 0.26462844014167786, + 0.9542630314826964, + 0.19313029944896695, + 1.264248013496399, + -0.6304428577423096, + 0.0487106591463089, + -0.16211535036563873, + -0.7894763350486755, + 0.3582514822483063, + -0.04153040423989296, + 0.635784387588501, + 0.6554391980171204, + -0.47010496258735657, + -0.8302040696144104, + -0.1350124627351761, + 0.2568812072277069, + 0.13614831864833832, + -0.2563649117946625, + -1.0434694290161133, + 0.3232482671737671, + 0.47882452607154846, + 0.4298652410507202, + 1.0563770532608032, + -0.28917592763900757, + -0.8533256649971008, + 0.10648339986801147, + 0.6376127004623413, + -0.20832888782024384, + 0.2370245456695557, + 0.0018312990432605147, + -0.2034837007522583, + 0.01051164511591196, + -1.105310082435608, + 0.29724350571632385, + 0.15604574978351593, + 0.1973688006401062, + 0.44394731521606445, + 0.3974513411521912, + -0.13625948131084442, + 0.9571986198425292, + 0.2257384955883026, + 0.2323588728904724, + -0.5583669543266296, + -0.7854922413825989, + 0.1647188365459442, + -1.6098142862319946, + 0.318587988615036, + -0.13399995863437653, + -0.2172701060771942, + -0.767514705657959, + -0.5813586711883545, + -0.3195130527019501, + -0.04894036799669266, + 0.2929930090904236, + -0.8213384747505188, + 0.07181350141763687, + 0.7469993829727173, + 0.6407455801963806, + 0.16365697979927063, + 0.7870153188705444, + 0.6524736881256104, + 0.6399973630905151, + -0.04992736503481865, + -0.03959266096353531, + -0.2512352466583252, + 0.8448855876922607, + -0.1422702670097351, + 0.1216789186000824, + -1.2647287845611572, + 0.5931149125099182, + 0.7186052203178406, + -0.06118432432413101, + -1.1942816972732544, + -0.17677085101604462, + 0.31543800234794617, + -0.32252824306488037, + 0.8255583047866821, + -0.14529970288276672, + -0.2695446312427521, + -0.33378756046295166, + -0.1653425395488739, + 0.1454019844532013, + -0.3920115828514099, + 0.912214994430542, + -0.7279734015464783, + 0.7374742031097412, + 0.933980405330658, + 0.13429680466651917, + -0.514870285987854, + 0.3989711999893189, + -0.11613689363002776, + 0.4022413492202759, + -0.9990655779838562, + -0.33749932050704956, + -0.4334589838981629, + -1.376373291015625, + -0.2993924915790558, + -0.09454808384180068, + -0.01314175222069025, + -0.001090060803107917, + 0.2137461006641388, + 0.2938512861728668, + 0.17508235573768616, + 0.8260607123374939, + -0.7218498587608337, + 0.2414487451314926, + -0.47296759486198425, + -0.3002610504627228, + -1.238540768623352, + 0.08663805574178696, + 0.6805586218833923, + 0.5909030437469482, + -0.42807504534721375, + -0.22887496650218964, + 0.47537800669670105, + -1.0474627017974854, + 0.6338009238243103, + 0.06548397243022919, + 0.4971011281013489, + 1.3484878540039063 ] ], "regenerate": true diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap index 4bd0e2c3e..e5d28e450 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap @@ -3,778 +3,783 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e0", - "desc": "overriden vector", + "id": "e1", + "desc": "natural vector", "_vectors": { - "default": [ - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ] + "default": { + "embeddings": [ + [ + -0.2979458272457123, + -0.5288640856742859, + -0.019957859069108963, + -0.18495318293571472, + 0.7429973483085632, + 0.5238497257232666, + 0.432366281747818, + 0.32744166254997253, + 0.0020762972999364138, + -0.9507834911346436, + -0.35097137093544006, + 0.08469701558351517, + -1.4176613092422483, + 0.4647577106952667, + -0.69340580701828, + 1.0372896194458008, + 0.3716741800308227, + 0.06031008064746857, + -0.6152024269104004, + 0.007914665155112743, + 0.7954924702644348, + -0.20773003995418549, + 0.09376765787601472, + 0.04508133605122566, + -0.2084471583366394, + -0.1518009901046753, + 0.018195509910583496, + -0.07044368237257004, + -0.18119366466999057, + -0.4480230510234833, + 0.3822529911994934, + 0.1911812424659729, + 0.4674372375011444, + 0.06963984668254852, + -0.09341949224472046, + 0.005675444379448891, + -0.6774799227714539, + -0.7066726684570313, + -0.39256376028060913, + 0.04005039855837822, + 0.2084812968969345, + -0.7872875928878784, + -0.8205880522727966, + 0.2919981777667999, + -0.06004738807678223, + -0.4907574355602264, + -1.5937862396240234, + 0.24249385297298431, + -0.14709846675395966, + -0.11860740929841997, + -0.8299489617347717, + 0.472964346408844, + -0.497518390417099, + -0.22205302119255063, + -0.4196169078350067, + 0.32697558403015137, + -0.360930860042572, + -0.9789686799049376, + 0.1887447088956833, + -0.403737336397171, + 0.18524253368377688, + 0.3768732249736786, + 0.3666233420372009, + 0.3511938452720642, + 0.6985810995101929, + 0.41721710562705994, + 0.09754953533411026, + 0.6204307079315186, + -1.0762996673583984, + -0.06263761967420578, + -0.7376511693000793, + 0.6849768161773682, + -0.1745152473449707, + -0.40449759364128113, + 0.20757411420345304, + -0.8424443006515503, + 0.330015629529953, + 0.3489064872264862, + 1.0954371690750122, + 0.8487558960914612, + 1.1076823472976685, + 0.61430823802948, + 0.4155903458595276, + 0.4111340939998626, + 0.05753209814429283, + -0.06429877132177353, + -0.765606164932251, + -0.41703930497169495, + -0.508820652961731, + 0.19859947264194489, + -0.16607828438282013, + -0.28112146258354187, + 0.11032675206661224, + 0.38809511065483093, + -0.36498191952705383, + -0.48671194911003113, + 0.6755134463310242, + 0.03958442434668541, + 0.4478721618652344, + -0.10335399955511092, + -0.9546685814857484, + -0.6087718605995178, + 0.17498846352100372, + 0.08320838958024979, + -1.4478336572647097, + -0.605027437210083, + -0.5867993235588074, + -0.14711688458919525, + -0.5447602272033691, + -0.026259321719408035, + -0.6997418403625488, + -0.07349082082509995, + 0.10638900846242905, + -0.7133527398109436, + -0.9396815299987792, + 1.087092399597168, + 1.1885089874267578, + 0.4011896848678589, + -0.4089202582836151, + -0.10938972979784012, + 0.6726722121238708, + 0.24576938152313232, + -0.24247920513153076, + 1.1499971151351929, + 0.47813335061073303, + -0.05331678315997124, + 0.32338133454322815, + 0.4870913326740265, + -0.23144258558750153, + -1.2023426294326782, + 0.2349330335855484, + 1.080536961555481, + 0.29334118962287903, + 0.391574501991272, + -0.15818795561790466, + -0.2948290705680847, + -0.024689948186278343, + 0.06602869182825089, + 0.5937030911445618, + -0.047901444137096405, + -0.512734591960907, + -0.35780075192451477, + 0.28751692175865173, + 0.4298716187477112, + 0.9242428541183472, + -0.17208744585514069, + 0.11515070497989656, + -0.0335976779460907, + -0.3422986567020416, + 0.5344581604003906, + 0.19895796477794647, + 0.33001241087913513, + 0.6390730142593384, + -0.6074934005737305, + -0.2553696632385254, + 0.9644920229911804, + 0.2699219584465027, + 0.6403993368148804, + -0.6380003690719604, + -0.027310986071825027, + 0.638815701007843, + 0.27719101309776306, + -0.13553589582443237, + 0.750195324420929, + 0.1224869191646576, + -0.20613941550254825, + 0.8444448709487915, + 0.16200250387191772, + -0.24750925600528717, + -0.739950954914093, + -0.28443849086761475, + -1.176282525062561, + 0.516107976436615, + 0.3774825632572174, + 0.10906043648719788, + 0.07962015271186829, + 0.7384604215621948, + -0.051241904497146606, + 1.1730090379714966, + -0.4828610122203827, + -1.404372215270996, + 0.8811132311820984, + -0.3839482367038727, + 0.022516896948218346, + -0.0491158664226532, + -0.43027013540267944, + 1.2049334049224854, + -0.27309560775756836, + 0.6883630752563477, + 0.8264574408531189, + -0.5020735263824463, + -0.4874092042446137, + 0.6007202863693237, + -0.4965405762195587, + 1.1302915811538696, + 0.032572727650403976, + -0.3731859028339386, + 0.658271849155426, + -0.9023059010505676, + 0.7400162220001221, + 0.014550759457051754, + -0.19699542224407196, + 0.2319706380367279, + -0.789058268070221, + -0.14905710518360138, + -0.5826214551925659, + 0.207652747631073, + -0.4507439732551574, + -0.3163885474205017, + 0.3604124188423157, + -0.45119962096214294, + 0.3428427278995514, + 0.3005594313144684, + -0.36026081442832947, + 1.1014249324798584, + 0.40884315967559814, + 0.34991952776908875, + -0.1806638240814209, + 0.27440476417541504, + -0.7118373513221741, + 0.4645499587059021, + 0.214790478348732, + -0.2343102991580963, + 0.10500429570674896, + -0.28034430742263794, + 1.2267805337905884, + 1.0561333894729614, + -0.497364342212677, + -0.6143305897712708, + 0.24963727593421936, + -0.33136463165283203, + -0.01473914459347725, + 0.495918869972229, + -0.6985538005828857, + -1.0033197402954102, + 0.35937801003456116, + 0.6325868368148804, + -0.6808838844299316, + 1.0354058742523191, + -0.7214401960372925, + -0.33318862318992615, + 0.874398410320282, + -0.6594992280006409, + 0.6830640435218811, + -0.18534131348133087, + 0.024834271520376205, + 0.19901277124881744, + -0.5992477536201477, + -1.2126628160476685, + -0.9245557188987732, + -0.3898217976093292, + -0.1286519467830658, + 0.4217943847179413, + -0.1143646091222763, + 0.5630772709846497, + -0.5240639448165894, + 0.21152715384960177, + -0.3792001008987427, + 0.8266305327415466, + 1.170984387397766, + -0.8072142004966736, + 0.11382893472909927, + -0.17953898012638092, + -0.1789460331201553, + -0.15078622102737427, + -1.2082908153533936, + -0.7812382578849792, + -0.10903695970773696, + 0.7303897142410278, + -0.39054441452026367, + 0.19511254131793976, + -0.09121843427419662, + 0.22400228679180145, + 0.30143046379089355, + 0.1141919493675232, + 0.48112115263938904, + 0.7307931780815125, + 0.09701362252235413, + -0.2795647978782654, + -0.3997688889503479, + 0.5540812611579895, + 0.564578115940094, + -0.40065160393714905, + -0.3629159033298493, + -0.3789091110229492, + -0.7298538088798523, + -0.6996853351593018, + -0.4477842152118683, + -0.289089560508728, + -0.6430277824401855, + 0.2344944179058075, + 0.3742927014827728, + -0.5079357028007507, + 0.28841453790664673, + 0.06515737622976303, + 0.707315981388092, + 0.09498685598373412, + 0.8365515470504761, + 0.10002726316452026, + -0.7695478200912476, + 0.6264724135398865, + 0.7562043070793152, + -0.23112858831882477, + -0.2871039807796478, + -0.25010058283805847, + 0.2783474028110504, + -0.03224996477365494, + -0.9119359850883484, + -3.6940200328826904, + -0.5099936127662659, + -0.1604711413383484, + 0.17453284561634064, + 0.41759559512138367, + 0.1419190913438797, + -0.11362407356500626, + -0.33312007784843445, + 0.11511333286762238, + 0.4667884409427643, + -0.0031647447030991316, + 0.15879854559898376, + 0.3042248487472534, + 0.5404849052429199, + 0.8515422344207764, + 0.06286454200744629, + 0.43790125846862793, + -0.8682025074958801, + -0.06363756954669952, + 0.5547921657562256, + -0.01483887154608965, + -0.07361344993114471, + -0.929947018623352, + 0.3502565622329712, + -0.5080993175506592, + 1.0380364656448364, + -0.2017953395843506, + 0.21319580078125, + -1.0763001441955566, + -0.556368887424469, + 0.1949922740459442, + -0.6445739269256592, + 0.6791343688964844, + 0.21188358962535855, + 0.3736183941364288, + -0.21800459921360016, + 0.7597446441650391, + -0.3732394874095917, + -0.4710160195827484, + 0.025146087631583217, + 0.05341297015547752, + -0.9522109627723694, + -0.6000866889953613, + -0.08469046652317047, + 0.5966026186943054, + 0.3444081246852875, + -0.461188405752182, + -0.5279349088668823, + 0.10296865552663804, + 0.5175143480300903, + -0.20671147108078003, + 0.13392412662506104, + 0.4812754988670349, + 0.2993808686733246, + -0.3005635440349579, + 0.5141698122024536, + -0.6239235401153564, + 0.2877119481563568, + -0.4452739953994751, + 0.5621107816696167, + 0.5047508478164673, + -0.4226335883140564, + -0.18578553199768064, + -1.1967322826385498, + 0.28178197145462036, + -0.8692031502723694, + -1.1812998056411743, + -1.4526212215423584, + 0.4645712077617645, + 0.9327932000160216, + -0.6560136675834656, + 0.461549699306488, + -0.5621527433395386, + -1.328449010848999, + -0.08676894754171371, + 0.00021918353741057217, + -0.18864136934280396, + 0.1259666532278061, + 0.18240638077259064, + -0.14919660985469818, + -0.8965857625007629, + -0.7539900541305542, + 0.013973715715110302, + 0.504276692867279, + -0.704748272895813, + -0.6428424119949341, + 0.6303996443748474, + -0.5404738187789917, + -0.31176653504371643, + -0.21262824535369873, + 0.18736739456653595, + -0.7998970746994019, + 0.039946746081113815, + 0.7390344738960266, + 0.4283199906349182, + 0.3795057237148285, + 0.07204607129096985, + -0.9230587482452391, + 0.9440426230430604, + 0.26272690296173096, + 0.5598306655883789, + -1.0520871877670288, + -0.2677186131477356, + -0.1888762265443802, + 0.30426350235939026, + 0.4746131896972656, + -0.5746733546257019, + -0.4197768568992615, + 0.8565112948417664, + -0.6767723560333252, + 0.23448683321475983, + -0.2010004222393036, + 0.4112907350063324, + -0.6497949957847595, + -0.418667733669281, + -0.4950824975967407, + 0.44438859820365906, + 1.026281714439392, + 0.482397586107254, + -0.26220494508743286, + -0.3640787005424499, + 0.5907743573188782, + -0.8771642446517944, + 0.09708411991596222, + -0.3671700060367584, + 0.4331349730491638, + 0.619417667388916, + -0.2684665620326996, + -0.5123821496963501, + -0.1502324342727661, + -0.012190685607492924, + 0.3580845892429352, + 0.8617186546325684, + 0.3493645489215851, + 1.0270192623138428, + 0.18297909200191495, + -0.5881339311599731, + -0.1733516901731491, + -0.5040576457977295, + -0.340370237827301, + -0.26767754554748535, + -0.28570041060447693, + -0.032928116619586945, + 0.6029254794120789, + 0.17397655546665192, + 0.09346921741962431, + 0.27815181016921997, + -0.46699589490890503, + -0.8148876428604126, + -0.3964351713657379, + 0.3812595009803772, + 0.13547226786613464, + 0.7126688361167908, + -0.3473474085330963, + -0.06573959439992905, + -0.6483767032623291, + 1.4808889627456665, + 0.30924928188323975, + -0.5085946917533875, + -0.8613000512123108, + 0.3048902451992035, + -0.4241599142551422, + 0.15909206867218018, + 0.5764641761779785, + -0.07879110425710678, + 1.015336513519287, + 0.07599356025457382, + -0.7025855779647827, + 0.30047643184661865, + -0.35094937682151794, + 0.2522146999835968, + -0.2338722199201584, + -0.8326804637908936, + -0.13695412874221802, + -0.03452421352267265, + 0.47974953055381775, + -0.18385636806488037, + 0.32438594102859497, + 0.1797013282775879, + 0.787494957447052, + -0.12579888105392456, + -0.07507286965847015, + -0.4389670491218567, + 0.2720070779323578, + 0.8138866424560547, + 0.01974171027541161, + -0.3057698905467987, + -0.6709924936294556, + 0.0885881632566452, + -0.2862754464149475, + 0.03475658595561981, + -0.1285519152879715, + 0.3838353455066681, + -0.2944154739379883, + -0.4204859137535095, + -0.4416137933731079, + 0.13426260650157928, + 0.36733248829841614, + 0.573428750038147, + -0.14928072690963745, + -0.026076916605234143, + 0.33286052942276, + -0.5340145826339722, + -0.17279052734375, + -0.01154550164937973, + -0.6620771884918213, + 0.18390542268753052, + -0.08265615254640579, + -0.2489682286977768, + 0.2429984211921692, + -0.044153645634651184, + -0.986578404903412, + -0.33574509620666504, + -0.5387663841247559, + 0.19767941534519196, + 0.12540718913078308, + -0.3403128981590271, + -0.4154576361179352, + 0.17275673151016235, + 0.09407442808151244, + -0.5414086580276489, + 0.4393929839134216, + 0.1725579798221588, + -0.4998118281364441, + -0.6926208138465881, + 0.16552448272705078, + 0.6659538149833679, + -0.10949844866991044, + 0.986426830291748, + 0.01748848147690296, + 0.4003709554672241, + -0.5430638194084167, + 0.35347291827201843, + 0.6887399554252625, + 0.08274628221988678, + 0.13407137989997864, + -0.591465950012207, + 0.3446292281150818, + 0.6069018244743347, + 0.1935492902994156, + -0.0989871397614479, + 0.07008486241102219, + -0.8503749370574951, + -0.09507356584072112, + 0.6259510517120361, + 0.13934025168418884, + 0.06392545253038406, + -0.4112265408039093, + -0.08475656062364578, + 0.4974113404750824, + -0.30606114864349365, + 1.111435890197754, + -0.018766529858112335, + -0.8422622680664063, + 0.4325508773326874, + -0.2832120656967163, + -0.4859798848628998, + -0.41498348116874695, + 0.015977520495653152, + 0.5292825698852539, + 0.4538311660289765, + 1.1328668594360352, + 0.22632671892642975, + 0.7918671369552612, + 0.33401933312416077, + 0.7306135296821594, + 0.3548600673675537, + 0.12506209313869476, + 0.8573207855224609, + -0.5818327069282532, + -0.6953738927841187, + -1.6171947717666626, + -0.1699674427509308, + 0.6318262815475464, + -0.05671752244234085, + -0.28145185112953186, + -0.3976689279079437, + -0.2041076272726059, + -0.5495951175689697, + -0.5152917504310608, + -0.9309796094894408, + 0.101932130753994, + 0.1367802917957306, + 0.1490798443555832, + 0.5304336547851563, + -0.5082434415817261, + 0.06688683480024338, + 0.14657628536224365, + -0.782435953617096, + 0.2962816655635834, + 0.6965363621711731, + 0.8496337532997131, + -0.3042965829372406, + 0.04343798756599426, + 0.0330701619386673, + -0.5662598013877869, + 1.1086925268173218, + 0.756072998046875, + -0.204134538769722, + 0.2404300570487976, + -0.47848284244537354, + 1.3659011125564575, + 0.5645433068275452, + -0.15836156904697418, + 0.43395575881004333, + 0.5944653749465942, + 1.0043466091156006, + -0.49446743726730347, + -0.5954391360282898, + 0.5341240763664246, + 0.020598189905285835, + -0.4036853015422821, + 0.4473709762096405, + 1.1998231410980225, + -0.9317775368690492, + -0.23321466147899628, + 0.2052552700042725, + -0.7423108816146851, + -0.19917210936546328, + -0.1722569614648819, + -0.034072667360305786, + -0.00671181408688426, + 0.46396249532699585, + -0.1372445821762085, + 0.053376372903585434, + 0.7392690777778625, + -0.38447609543800354, + 0.07497968524694443, + 0.5197252631187439, + 1.3746477365493774, + 0.9060075879096984, + 0.20000585913658145, + -0.4053704142570496, + 0.7497360110282898, + -0.34087055921554565, + -1.101803183555603, + 0.273650586605072, + -0.5125769376754761, + 0.22472351789474487, + 0.480757474899292, + -0.19845178723335263, + 0.8857700824737549, + 0.30752456188201904, + 1.1109285354614258, + -0.6768012642860413, + 0.524367094039917, + -0.22495046257972717, + -0.4224412739276886, + 0.40753406286239624, + -0.23133376240730288, + 0.3297771215438843, + 0.4905449151992798, + -0.6813114285469055, + -0.7543983459472656, + -0.5599071383476257, + 0.14351597428321838, + -0.029278717935085297, + -0.3970443606376648, + -0.303079217672348, + 0.24161772429943085, + 0.008353390730917454, + -0.0062365154735744, + 1.0824860334396362, + -0.3704061508178711, + -1.0337258577346802, + 0.04638749733567238, + 1.163011074066162, + -0.31737643480300903, + 0.013986887410283089, + 0.19223114848136905, + -0.2260770797729492, + -0.210910826921463, + -1.0191949605941772, + 0.22356095910072327, + 0.09353553503751756, + 0.18096882104873657, + 0.14867214858531952, + 0.43408671021461487, + -0.33312076330184937, + 0.8173948526382446, + 0.6428242921829224, + 0.20215003192424777, + -0.6634518504142761, + -0.4132290482521057, + 0.29815030097961426, + -1.579406976699829, + -0.0981958732008934, + -0.03941014781594277, + 0.1709178239107132, + -0.5481140613555908, + -0.5338194966316223, + -0.3528362512588501, + -0.11561278253793716, + -0.21793591976165771, + -1.1570470333099363, + 0.2157980799674988, + 0.42083489894866943, + 0.9639263153076172, + 0.09747201204299928, + 0.15671424567699432, + 0.4034591615200043, + 0.6728067994117737, + -0.5216875672340393, + 0.09657668322324751, + -0.2416689097881317, + 0.747975766658783, + 0.1021689772605896, + 0.11652665585279463, + -1.0484966039657593, + 0.8489304780960083, + 0.7169828414916992, + -0.09012343734502792, + -1.3173753023147583, + 0.057890523225069046, + -0.006231260951608419, + -0.1018214002251625, + 0.936040461063385, + -0.0502331368625164, + -0.4284322261810303, + -0.38209280371665955, + -0.22668412327766416, + 0.0782942995429039, + -0.4881664514541626, + 0.9268959760665894, + 0.001867273123934865, + 0.42261114716529846, + 0.8283362984657288, + 0.4256294071674347, + -0.7965338826179504, + 0.4840078353881836, + -0.19861412048339844, + 0.33977967500686646, + -0.4604192078113556, + -0.3107339143753052, + -0.2839638590812683, + -1.5734281539916992, + 0.005220232997089624, + 0.09239906817674635, + -0.7828494906425476, + -0.1397123783826828, + 0.2576255202293396, + 0.21372435986995697, + -0.23169949650764465, + 0.4016408920288086, + -0.462497353553772, + -0.2186472862958908, + -0.5617868900299072, + -0.3649831712245941, + -1.1585862636566162, + -0.08222806453704834, + 0.931126832962036, + 0.4327389597892761, + -0.46451422572135925, + -0.5430706143379211, + -0.27434298396110535, + -0.9479129314422609, + 0.1845661848783493, + 0.3972720205783844, + 0.4883299469947815, + 1.04031240940094 + ] + ], + "regenerate": true + } } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap new file mode 100644 index 000000000..4bd0e2c3e --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap @@ -0,0 +1,780 @@ +--- +source: dump/src/reader/mod.rs +expression: document +--- +{ + "id": "e0", + "desc": "overriden vector", + "_vectors": { + "default": [ + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ] + } +} diff --git a/dump/src/reader/v4/meta.rs b/dump/src/reader/v4/meta.rs index cec05f57c..2daea68a4 100644 --- a/dump/src/reader/v4/meta.rs +++ b/dump/src/reader/v4/meta.rs @@ -74,7 +74,8 @@ impl Display for IndexUidFormatError { f, "invalid index uid `{}`, the uid must be an integer \ or a string containing only alphanumeric characters \ - a-z A-Z 0-9, hyphens - and underscores _.", + a-z A-Z 0-9, hyphens - and underscores _, \ + and can not be more than 400 bytes.", self.invalid_uid, ) } diff --git a/dump/src/reader/v5/meta.rs b/dump/src/reader/v5/meta.rs index cec05f57c..2daea68a4 100644 --- a/dump/src/reader/v5/meta.rs +++ b/dump/src/reader/v5/meta.rs @@ -74,7 +74,8 @@ impl Display for IndexUidFormatError { f, "invalid index uid `{}`, the uid must be an integer \ or a string containing only alphanumeric characters \ - a-z A-Z 0-9, hyphens - and underscores _.", + a-z A-Z 0-9, hyphens - and underscores _, \ + and can not be more than 400 bytes.", self.invalid_uid, ) } diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index 679555a89..04b6dc266 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -27,6 +27,7 @@ pub enum Condition<'a> { LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, Contains { keyword: Token<'a>, word: Token<'a> }, + StartsWith { keyword: Token<'a>, word: Token<'a> }, } /// condition = value ("==" | ">" ...) value @@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult { )) } +/// starts with = value "CONTAINS" value +pub fn parse_starts_with(input: Span) -> IResult { + let (input, (fid, starts_with, value)) = + tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?; + Ok(( + input, + FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + }, + )) +} + +/// starts with = value "NOT" WS+ "CONTAINS" value +pub fn parse_not_starts_with(input: Span) -> IResult { + let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH"))); + let (input, (fid, (_not, _spaces, starts_with), value)) = + tuple((parse_value, keyword, cut(parse_value)))(input)?; + + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + })), + )) +} + /// to = value value "TO" WS+ value pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, _, to)) = diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index f530cc690..122396b87 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> { } ErrorKind::InvalidPrimary => { let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } ErrorKind::InvalidEscapedNumber => { writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d06154f25..cfe009acb 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -49,7 +49,7 @@ use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; use condition::{ parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, - parse_is_null, parse_not_contains, parse_not_exists, + parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with, }; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; @@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> { | Condition::LowerThan(_) | Condition::LowerThanOrEqual(_) | Condition::Between { .. } => None, - Condition::Contains { keyword, word: _ } => Some(keyword), + Condition::Contains { keyword, word: _ } + | Condition::StartsWith { keyword, word: _ } => Some(keyword), }, FilterCondition::Not(this) => this.use_contains_operator(), FilterCondition::Or(seq) | FilterCondition::And(seq) => { @@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_to, parse_contains, parse_not_contains, + parse_starts_with, + parse_not_starts_with, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo, parse_geo_distance, @@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> { Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"), Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"), + Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"), } } } @@ -680,6 +684,13 @@ pub mod tests { insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); + // Test STARTS WITH + NOT STARTS WITH + insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + // Test nested NOT insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); @@ -751,7 +762,7 @@ pub mod tests { "###); insta::assert_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -761,12 +772,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); @@ -851,12 +862,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -919,35 +930,35 @@ pub mod tests { "###); insta::assert_snapshot!(p(r#"value NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. 1:11 value NULL "###); insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. 1:15 value NOT NULL "###); insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. 1:12 value EMPTY "###); insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. 1:16 value NOT EMPTY "###); insta::assert_snapshot!(p(r#"value IS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. 1:9 value IS "###); insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. 1:13 value IS NOT "###); insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. 1:16 value IS EXISTS "###); insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. 1:20 value IS NOT EXISTS "###); } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 06ec1daef..5912f6900 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool { | "NULL" | "EMPTY" | "CONTAINS" + | "STARTS" + | "WITH" | "_geoRadius" | "_geoBoundingBox" ) diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 6f099a025..432a86382 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -40,7 +40,7 @@ ureq = "2.10.0" uuid = { version = "1.10.0", features = ["serde", "v4"] } [dev-dependencies] -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.39.0", features = ["json", "redactions"] } diff --git a/index-scheduler/src/autobatcher.rs b/index-scheduler/src/autobatcher.rs index 96201bebb..0f6aa8a3a 100644 --- a/index-scheduler/src/autobatcher.rs +++ b/index-scheduler/src/autobatcher.rs @@ -25,8 +25,9 @@ enum AutobatchKind { primary_key: Option, }, DocumentEdition, - DocumentDeletion, - DocumentDeletionByFilter, + DocumentDeletion { + by_filter: bool, + }, DocumentClear, Settings { allow_index_creation: bool, @@ -65,10 +66,12 @@ impl From for AutobatchKind { .. } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition, - KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion, + KindWithContent::DocumentDeletion { .. } => { + AutobatchKind::DocumentDeletion { by_filter: false } + } KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear, KindWithContent::DocumentDeletionByFilter { .. } => { - AutobatchKind::DocumentDeletionByFilter + AutobatchKind::DocumentDeletion { by_filter: true } } KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => { AutobatchKind::Settings { @@ -105,9 +108,7 @@ pub enum BatchKind { }, DocumentDeletion { deletion_ids: Vec, - }, - DocumentDeletionByFilter { - id: TaskId, + includes_by_filter: bool, }, ClearAndSettings { other: Vec, @@ -205,12 +206,13 @@ impl BatchKind { allow_index_creation, ), K::DocumentEdition => (Break(BatchKind::DocumentEdition { id: task_id }), false), - K::DocumentDeletion => { - (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false) - } - K::DocumentDeletionByFilter => { - (Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false) - } + K::DocumentDeletion { by_filter: includes_by_filter } => ( + Continue(BatchKind::DocumentDeletion { + deletion_ids: vec![task_id], + includes_by_filter, + }), + false, + ), K::Settings { allow_index_creation } => ( Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }), allow_index_creation, @@ -228,7 +230,7 @@ impl BatchKind { match (self, kind) { // We don't batch any of these operations - (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition | K::DocumentDeletionByFilter) => Break(this), + (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentEdition) => Break(this), // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists. (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => { Break(this) @@ -264,7 +266,7 @@ impl BatchKind { // The index deletion can batch with everything but must stop after ( BatchKind::DocumentClear { mut ids } - | BatchKind::DocumentDeletion { deletion_ids: mut ids } + | BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ } | BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids } | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, K::IndexDeletion, @@ -284,7 +286,7 @@ impl BatchKind { ( BatchKind::DocumentClear { mut ids }, - K::DocumentClear | K::DocumentDeletion, + K::DocumentClear | K::DocumentDeletion { by_filter: _ }, ) => { ids.push(id); Continue(BatchKind::DocumentClear { ids }) @@ -328,7 +330,7 @@ impl BatchKind { } ( BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, - K::DocumentDeletion, + K::DocumentDeletion { by_filter: false }, ) => { operation_ids.push(id); @@ -339,6 +341,13 @@ impl BatchKind { operation_ids, }) } + // We can't batch a document operation with a delete by filter + ( + this @ BatchKind::DocumentOperation { .. }, + K::DocumentDeletion { by_filter: true }, + ) => { + Break(this) + } // but we can't autobatch documents if it's not the same kind // this match branch MUST be AFTER the previous one ( @@ -357,13 +366,18 @@ impl BatchKind { operation_ids, }), - (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => { + (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => { deletion_ids.push(id); Continue(BatchKind::DocumentClear { ids: deletion_ids }) } + // we can't autobatch the deletion and import if the document deletion contained a filter + ( + this @ BatchKind::DocumentDeletion { deletion_ids: _, includes_by_filter: true }, + K::DocumentImport { .. } + ) => Break(this), // we can autobatch the deletion and import if the index already exists ( - BatchKind::DocumentDeletion { mut deletion_ids }, + BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, K::DocumentImport { method, allow_index_creation, primary_key } ) if index_already_exists => { deletion_ids.push(id); @@ -377,7 +391,7 @@ impl BatchKind { } // we can autobatch the deletion and import if both can't create an index ( - BatchKind::DocumentDeletion { mut deletion_ids }, + BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, K::DocumentImport { method, allow_index_creation, primary_key } ) if !allow_index_creation => { deletion_ids.push(id); @@ -396,9 +410,9 @@ impl BatchKind { ) => { Break(this) } - (BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => { + (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter }, K::DocumentDeletion { by_filter }) => { deletion_ids.push(id); - Continue(BatchKind::DocumentDeletion { deletion_ids }) + Continue(BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: includes_by_filter | by_filter }) } (this @ BatchKind::DocumentDeletion { .. }, K::Settings { .. }) => Break(this), @@ -412,7 +426,7 @@ impl BatchKind { }), ( this @ BatchKind::Settings { .. }, - K::DocumentImport { .. } | K::DocumentDeletion, + K::DocumentImport { .. } | K::DocumentDeletion { .. }, ) => Break(this), ( BatchKind::Settings { mut settings_ids, allow_index_creation }, @@ -443,7 +457,7 @@ impl BatchKind { settings_ids, allow_index_creation, }, - K::DocumentDeletion, + K::DocumentDeletion { .. }, ) => { other.push(id); Continue(BatchKind::ClearAndSettings { @@ -505,7 +519,7 @@ impl BatchKind { // this MUST be AFTER the two previous branch ( this @ BatchKind::SettingsAndDocumentOperation { .. }, - K::DocumentDeletion | K::DocumentImport { .. }, + K::DocumentDeletion { .. } | K::DocumentImport { .. }, ) => Break(this), ( BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids }, @@ -525,8 +539,7 @@ impl BatchKind { | BatchKind::IndexDeletion { .. } | BatchKind::IndexUpdate { .. } | BatchKind::IndexSwap { .. } - | BatchKind::DocumentEdition { .. } - | BatchKind::DocumentDeletionByFilter { .. }, + | BatchKind::DocumentEdition { .. }, _, ) => { unreachable!() @@ -616,6 +629,13 @@ mod tests { } } + fn doc_del_fil() -> KindWithContent { + KindWithContent::DocumentDeletionByFilter { + index_uid: String::from("doggo"), + filter_expr: serde_json::json!("cuteness > 100"), + } + } + fn doc_clr() -> KindWithContent { KindWithContent::DocumentClear { index_uid: String::from("doggo") } } @@ -676,10 +696,16 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); // we can autobatch one or multiple DocumentDeletion together - debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del(), doc_del(), doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: false }, false))"); + + // we can autobatch one or multiple DocumentDeletionByFilter together + debug_snapshot!(autobatch_from(true, None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), doc_del_fil(), doc_del_fil()]), @"Some((DocumentDeletion { deletion_ids: [0, 1, 2], includes_by_filter: true }, false))"); // we can autobatch one or multiple Settings together debug_snapshot!(autobatch_from(true, None, [settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0] }, true))"); @@ -722,25 +748,63 @@ mod tests { debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + + // But we can't autobatch document addition with document deletion by filter + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + // And the other way around + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del_fil(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); } #[test] fn simple_document_operation_dont_autobatch_with_other() { - // addition, updates and deletion can't batch together + // addition, updates and deletion by filter can't batch together debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); } #[test] @@ -807,6 +871,7 @@ mod tests { debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); @@ -816,6 +881,7 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del()]), @"Some((IndexDeletion { ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_del_fil()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), doc_clr()]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(true)]), @"Some((IndexDeletion { ids: [0] }, false))"); debug_snapshot!(autobatch_from(false,None, [idx_del(), settings(false)]), @"Some((IndexDeletion { ids: [0] }, false))"); @@ -827,6 +893,7 @@ mod tests { debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(true, None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); debug_snapshot!(autobatch_from(true, None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); @@ -836,6 +903,7 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [doc_del(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_del_fil(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); debug_snapshot!(autobatch_from(false,None, [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))"); debug_snapshot!(autobatch_from(false,None, [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))"); @@ -901,10 +969,10 @@ mod tests { debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); // batch deletion and addition - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); } #[test] diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs index 3e6e78614..903ec1217 100644 --- a/index-scheduler/src/batch.rs +++ b/index-scheduler/src/batch.rs @@ -110,9 +110,9 @@ pub(crate) enum IndexOperation { index_uid: String, task: Task, }, - IndexDocumentDeletionByFilter { + DocumentDeletion { index_uid: String, - task: Task, + tasks: Vec, }, DocumentClear { index_uid: String, @@ -165,11 +165,11 @@ impl Batch { Batch::IndexOperation { op, .. } => match op { IndexOperation::DocumentOperation { tasks, .. } | IndexOperation::Settings { tasks, .. } + | IndexOperation::DocumentDeletion { tasks, .. } | IndexOperation::DocumentClear { tasks, .. } => { RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid)) } - IndexOperation::DocumentEdition { task, .. } - | IndexOperation::IndexDocumentDeletionByFilter { task, .. } => { + IndexOperation::DocumentEdition { task, .. } => { RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap() } IndexOperation::SettingsAndDocumentOperation { @@ -234,7 +234,7 @@ impl IndexOperation { match self { IndexOperation::DocumentOperation { index_uid, .. } | IndexOperation::DocumentEdition { index_uid, .. } - | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. } + | IndexOperation::DocumentDeletion { index_uid, .. } | IndexOperation::DocumentClear { index_uid, .. } | IndexOperation::Settings { index_uid, .. } | IndexOperation::DocumentClearAndSetting { index_uid, .. } @@ -252,8 +252,8 @@ impl fmt::Display for IndexOperation { IndexOperation::DocumentEdition { .. } => { f.write_str("IndexOperation::DocumentEdition") } - IndexOperation::IndexDocumentDeletionByFilter { .. } => { - f.write_str("IndexOperation::IndexDocumentDeletionByFilter") + IndexOperation::DocumentDeletion { .. } => { + f.write_str("IndexOperation::DocumentDeletion") } IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"), IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"), @@ -289,21 +289,6 @@ impl IndexScheduler { }, must_create_index, })), - BatchKind::DocumentDeletionByFilter { id } => { - let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; - match &task.kind { - KindWithContent::DocumentDeletionByFilter { index_uid, .. } => { - Ok(Some(Batch::IndexOperation { - op: IndexOperation::IndexDocumentDeletionByFilter { - index_uid: index_uid.clone(), - task, - }, - must_create_index: false, - })) - } - _ => unreachable!(), - } - } BatchKind::DocumentEdition { id } => { let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?; match &task.kind { @@ -366,30 +351,11 @@ impl IndexScheduler { must_create_index, })) } - BatchKind::DocumentDeletion { deletion_ids } => { + BatchKind::DocumentDeletion { deletion_ids, includes_by_filter: _ } => { let tasks = self.get_existing_tasks(rtxn, deletion_ids)?; - let mut operations = Vec::with_capacity(tasks.len()); - let mut documents_counts = Vec::with_capacity(tasks.len()); - for task in &tasks { - match task.kind { - KindWithContent::DocumentDeletion { ref documents_ids, .. } => { - operations.push(DocumentOperation::Delete(documents_ids.clone())); - documents_counts.push(documents_ids.len() as u64); - } - _ => unreachable!(), - } - } - Ok(Some(Batch::IndexOperation { - op: IndexOperation::DocumentOperation { - index_uid, - primary_key: None, - method: IndexDocumentsMethod::ReplaceDocuments, - documents_counts, - operations, - tasks, - }, + op: IndexOperation::DocumentDeletion { index_uid, tasks }, must_create_index, })) } @@ -1439,7 +1405,7 @@ impl IndexScheduler { { (original_filter, context, function) } else { - // In the case of a `documentDeleteByFilter` the details MUST be set + // In the case of a `documentEdition` the details MUST be set unreachable!(); }; @@ -1469,52 +1435,102 @@ impl IndexScheduler { Ok(vec![task]) } - IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => { - let filter = - if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } = - &task.kind - { - filter_expr - } else { - unreachable!() - }; - let deleted_documents = delete_document_by_filter( - index_wtxn, - filter, - self.index_mapper.indexer_config(), - self.must_stop_processing.clone(), - index, - ); - let original_filter = if let Some(Details::DocumentDeletionByFilter { - original_filter, - deleted_documents: _, - }) = task.details - { - original_filter - } else { - // In the case of a `documentDeleteByFilter` the details MUST be set - unreachable!(); - }; + IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => { + let mut to_delete = RoaringBitmap::new(); + let external_documents_ids = index.external_documents_ids(); - match deleted_documents { - Ok(deleted_documents) => { - task.status = Status::Succeeded; - task.details = Some(Details::DocumentDeletionByFilter { - original_filter, - deleted_documents: Some(deleted_documents), - }); - } - Err(e) => { - task.status = Status::Failed; - task.details = Some(Details::DocumentDeletionByFilter { - original_filter, - deleted_documents: Some(0), - }); - task.error = Some(e.into()); + for task in tasks.iter_mut() { + let before = to_delete.len(); + task.status = Status::Succeeded; + + match &task.kind { + KindWithContent::DocumentDeletion { index_uid: _, documents_ids } => { + for id in documents_ids { + if let Some(id) = external_documents_ids.get(index_wtxn, id)? { + to_delete.insert(id); + } + } + let will_be_removed = to_delete.len() - before; + task.details = Some(Details::DocumentDeletion { + provided_ids: documents_ids.len(), + deleted_documents: Some(will_be_removed), + }); + } + KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => { + let before = to_delete.len(); + let filter = match Filter::from_json(filter_expr) { + Ok(filter) => filter, + Err(err) => { + // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens + task.status = Status::Failed; + task.error = match err { + milli::Error::UserError( + milli::UserError::InvalidFilterExpression { .. }, + ) => Some( + Error::from(err) + .with_custom_error_code(Code::InvalidDocumentFilter) + .into(), + ), + e => Some(e.into()), + }; + None + } + }; + if let Some(filter) = filter { + let candidates = + filter.evaluate(index_wtxn, index).map_err(|err| match err { + milli::Error::UserError( + milli::UserError::InvalidFilter(_), + ) => Error::from(err) + .with_custom_error_code(Code::InvalidDocumentFilter), + e => e.into(), + }); + match candidates { + Ok(candidates) => to_delete |= candidates, + Err(err) => { + task.status = Status::Failed; + task.error = Some(err.into()); + } + }; + } + let will_be_removed = to_delete.len() - before; + if let Some(Details::DocumentDeletionByFilter { + original_filter: _, + deleted_documents, + }) = &mut task.details + { + *deleted_documents = Some(will_be_removed); + } else { + // In the case of a `documentDeleteByFilter` the details MUST be set + unreachable!() + } + } + _ => unreachable!(), } } - Ok(vec![task]) + let config = IndexDocumentsConfig { + update_method: IndexDocumentsMethod::ReplaceDocuments, + ..Default::default() + }; + + let must_stop_processing = self.must_stop_processing.clone(); + let mut builder = milli::update::IndexDocuments::new( + index_wtxn, + index, + self.index_mapper.indexer_config(), + config, + |indexing_step| tracing::debug!(update = ?indexing_step), + || must_stop_processing.get(), + )?; + + let (new_builder, _count) = + builder.remove_documents_from_db_no_batch(&to_delete)?; + builder = new_builder; + + let _ = builder.execute()?; + + Ok(tasks) } IndexOperation::Settings { index_uid: _, settings, mut tasks } => { let indexer_config = self.index_mapper.indexer_config(); @@ -1718,46 +1734,6 @@ impl IndexScheduler { } } -fn delete_document_by_filter<'a>( - wtxn: &mut RwTxn<'a>, - filter: &serde_json::Value, - indexer_config: &IndexerConfig, - must_stop_processing: MustStopProcessing, - index: &'a Index, -) -> Result { - let filter = Filter::from_json(filter)?; - Ok(if let Some(filter) = filter { - let candidates = filter.evaluate(wtxn, index).map_err(|err| match err { - milli::Error::UserError(milli::UserError::InvalidFilter(_)) => { - Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter) - } - e => e.into(), - })?; - - let config = IndexDocumentsConfig { - update_method: IndexDocumentsMethod::ReplaceDocuments, - ..Default::default() - }; - - let mut builder = milli::update::IndexDocuments::new( - wtxn, - index, - indexer_config, - config, - |indexing_step| tracing::debug!(update = ?indexing_step), - || must_stop_processing.get(), - )?; - - let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?; - builder = new_builder; - - let _ = builder.execute()?; - count - } else { - 0 - }) -} - fn edit_documents_by_function<'a>( wtxn: &mut RwTxn<'a>, filter: &Option, diff --git a/index-scheduler/src/error.rs b/index-scheduler/src/error.rs index 223b84762..3bd378fd6 100644 --- a/index-scheduler/src/error.rs +++ b/index-scheduler/src/error.rs @@ -101,7 +101,7 @@ pub enum Error { )] InvalidTaskCanceledBy { canceled_by: String }, #[error( - "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_)." + "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes." )] InvalidIndexUid { index_uid: String }, #[error("Task `{0}` not found.")] diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index c998ff444..f4ac80511 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -87,7 +87,7 @@ impl RoFeatures { Ok(()) } else { Err(FeatureNotEnabledError { - disabled_action: "Using `CONTAINS` in a filter", + disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter", feature: "contains filter", issue_link: "https://github.com/orgs/meilisearch/discussions/763", } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 21e503567..fe8244f9b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1477,7 +1477,7 @@ impl IndexScheduler { .map( |IndexEmbeddingConfig { name, - config: milli::vector::EmbeddingConfig { embedder_options, prompt }, + config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized }, .. }| { let prompt = @@ -1486,7 +1486,10 @@ impl IndexScheduler { { let embedders = self.embedders.read().unwrap(); if let Some(embedder) = embedders.get(&embedder_options) { - return Ok((name, (embedder.clone(), prompt))); + return Ok(( + name, + (embedder.clone(), prompt, quantized.unwrap_or_default()), + )); } } @@ -1500,7 +1503,7 @@ impl IndexScheduler { let mut embedders = self.embedders.write().unwrap(); embedders.insert(embedder_options, embedder.clone()); } - Ok((name, (embedder, prompt))) + Ok((name, (embedder, prompt, quantized.unwrap_or_default()))) }, ) .collect(); @@ -1764,6 +1767,7 @@ mod tests { use crossbeam::channel::RecvTimeoutError; use file_store::File; use insta::assert_json_snapshot; + use maplit::btreeset; use meili_snap::{json_string, snapshot}; use meilisearch_auth::AuthFilter; use meilisearch_types::document_formats::DocumentFormatError; @@ -2553,6 +2557,117 @@ mod tests { snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents"); } + #[test] + fn fail_in_process_batch_for_document_deletion() { + let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); + + use meilisearch_types::settings::{Settings, Unchecked}; + let mut new_settings: Box> = Box::default(); + new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + + index_scheduler + .register( + KindWithContent::SettingsUpdate { + index_uid: S("doggos"), + new_settings, + is_deletion: false, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + + let content = r#"[ + { "id": 1, "doggo": "jean bob" }, + { "id": 2, "catto": "jorts" }, + { "id": 3, "doggo": "bork" } + ]"#; + + let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap(); + let documents_count = read_json(content.as_bytes(), &mut file).unwrap(); + file.persist().unwrap(); + index_scheduler + .register( + KindWithContent::DocumentAdditionOrUpdate { + index_uid: S("doggos"), + primary_key: Some(S("id")), + method: ReplaceDocuments, + content_file: uuid, + documents_count, + allow_index_creation: true, + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_setting_and_document_addition"); + + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_settings"); + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_adding_the_documents"); + + index_scheduler + .register( + KindWithContent::DocumentDeletion { + index_uid: S("doggos"), + documents_ids: vec![S("1")], + }, + None, + false, + ) + .unwrap(); + // This one should not be catched by Meilisearch but it's still nice to handle it because if one day we break the filters it could happens + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!(true), + }, + None, + false, + ) + .unwrap(); + // Should fail because the ids are not filterable + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("id = 2"), + }, + None, + false, + ) + .unwrap(); + index_scheduler + .register( + KindWithContent::DocumentDeletionByFilter { + index_uid: S("doggos"), + filter_expr: serde_json::json!("catto EXISTS"), + }, + None, + false, + ) + .unwrap(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_document_deletions"); + + // Everything should be batched together + handle.advance_one_successful_batch(); + snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_removing_the_documents"); + + let index = index_scheduler.index("doggos").unwrap(); + let rtxn = index.read_txn().unwrap(); + let field_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let field_ids = field_ids_map.ids().collect::>(); + let documents = index + .all_documents(&rtxn) + .unwrap() + .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap()) + .collect::>(); + snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork"); + } + #[test] fn do_not_batch_task_of_different_indexes() { let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]); @@ -5085,7 +5200,7 @@ mod tests { let simple_hf_name = name.clone(); let configs = index_scheduler.embedders(configs).unwrap(); - let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap(); + let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap(); let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap(); let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap(); @@ -5403,7 +5518,11 @@ mod tests { ), prompt: PromptData { template: "{{doc.doggo}}", + max_bytes: Some( + 400, + ), }, + quantized: None, }, user_provided: RoaringBitmap<[1, 2]>, }, @@ -5416,28 +5535,8 @@ mod tests { // the document with the id 3 should keep its original embedding let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let mut embeddings = Vec::new(); - - 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; - }; - - let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap(); - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } - } + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embeddings = &embeddings["my_doggo_embedder"]; snapshot!(embeddings.len(), @"1"); assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); @@ -5617,8 +5716,12 @@ mod tests { }, ), prompt: PromptData { - template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), }, + quantized: None, }, user_provided: RoaringBitmap<[0]>, }, @@ -5657,8 +5760,12 @@ mod tests { }, ), prompt: PromptData { - template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", + template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + max_bytes: Some( + 400, + ), }, + quantized: None, }, user_provided: RoaringBitmap<[]>, }, diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap new file mode 100644 index 000000000..62e634bc5 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap @@ -0,0 +1,44 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap new file mode 100644 index 000000000..45065d8b1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 0, field_distribution: {} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap new file mode 100644 index 000000000..45065d8b1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings_and_documents.snap @@ -0,0 +1,43 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [1,] +succeeded [0,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 0, field_distribution: {} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap new file mode 100644 index 000000000..82748751e --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -0,0 +1,56 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +2 {uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} +3 {uid: 3, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} +4 {uid: 4, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} +5 {uid: 5, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} +---------------------------------------------------------------------- +### Status: +enqueued [] +succeeded [0,1,2,5,] +failed [3,4,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [2,3,4,5,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,3,4,5,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,3,4,5,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap new file mode 100644 index 000000000..2b56b71d1 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/documents_remaining_should_only_be_bork.snap @@ -0,0 +1,9 @@ +--- +source: index-scheduler/src/lib.rs +--- +[ + { + "id": 3, + "doggo": "bork" + } +] diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap new file mode 100644 index 000000000..502ff0806 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap @@ -0,0 +1,53 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} +3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} +4 {uid: 4, status: enqueued, details: { original_filter: "id = 2", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} +5 {uid: 5, status: enqueued, details: { original_filter: "catto EXISTS", deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} +---------------------------------------------------------------------- +### Status: +enqueued [2,3,4,5,] +succeeded [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"documentDeletion" [2,3,4,5,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,2,3,4,5,] +---------------------------------------------------------------------- +### Index Mapper: +doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} } + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +[timestamp] [2,] +[timestamp] [3,] +[timestamp] [4,] +[timestamp] [5,] +---------------------------------------------------------------------- +### Started At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Finished At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### File Store: + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap new file mode 100644 index 000000000..f7e5c35d3 --- /dev/null +++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap @@ -0,0 +1,39 @@ +--- +source: index-scheduler/src/lib.rs +--- +### Autobatching Enabled = true +### Processing Tasks: +[] +---------------------------------------------------------------------- +### All Tasks: +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} +---------------------------------------------------------------------- +### Status: +enqueued [0,1,] +---------------------------------------------------------------------- +### Kind: +"documentAdditionOrUpdate" [1,] +"settingsUpdate" [0,] +---------------------------------------------------------------------- +### Index Tasks: +doggos [0,1,] +---------------------------------------------------------------------- +### Index Mapper: + +---------------------------------------------------------------------- +### Canceled By: + +---------------------------------------------------------------------- +### Enqueued At: +[timestamp] [0,] +[timestamp] [1,] +---------------------------------------------------------------------- +### Started At: +---------------------------------------------------------------------- +### Finished At: +---------------------------------------------------------------------- +### File Store: +00000000-0000-0000-0000-000000000000 + +---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index add94c403..41cfcfdab 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index 2c2e986a6..e6d0d8232 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index 32cd22281..bd4cf0c09 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index adf7a06a6..746c7c870 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index 30d71a7f5..15cfd732a 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index 163d23aac..9b5c6ce4c 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index 8bd4d7739..37f0a062d 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index 942e0b89f..3906fc6fc 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 3aae58b2b..55d31b8cf 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -67,3 +67,5 @@ khmer = ["milli/khmer"] vietnamese = ["milli/vietnamese"] # force swedish character recomposition swedish-recomposition = ["milli/swedish-recomposition"] +# force german character recomposition +german = ["milli/german"] diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 4d80fe9c9..514ed18c3 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,8 +238,14 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; @@ -388,7 +394,11 @@ impl ErrorCode for milli::Error { | UserError::InvalidOpenAiModelDimensionsMax { .. } | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidUrl { .. } - | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, + | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } + | UserError::InvalidPrompt(_) + | UserError::InvalidDisableBinaryQuantization { .. } => { + Code::InvalidSettingsEmbedders + } UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, @@ -533,7 +543,8 @@ impl fmt::Display for deserr_codes::InvalidSimilarId { f, "the value of `id` is invalid. \ A document identifier can be of type integer or string, \ - only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)." + only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \ + and can not be more than 512 bytes." ) } } diff --git a/meilisearch-types/src/index_uid.rs b/meilisearch-types/src/index_uid.rs index 341ab02cb..03a31a82f 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/meilisearch-types/src/index_uid.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::error::Error; use std::fmt; use std::str::FromStr; @@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode}; /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] pub struct IndexUid(String); @@ -70,6 +71,12 @@ impl From for String { } } +impl Borrow for IndexUid { + fn borrow(&self) -> &String { + &self.0 + } +} + #[derive(Debug)] pub struct IndexUidFormatError { pub invalid_uid: String, @@ -81,7 +88,8 @@ impl fmt::Display for IndexUidFormatError { f, "`{}` is not a valid index uid. Index uid can be an \ integer or a string containing only alphanumeric \ - characters, hyphens (-) and underscores (_).", + characters, hyphens (-) and underscores (_), \ + and can not be more than 512 bytes.", self.invalid_uid, ) } diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index c6902dd71..8d746779e 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -1,134 +1,6 @@ use deserr::Deserr; use milli::LocalizedAttributesRule; use serde::{Deserialize, Serialize}; -use serde_json::json; - -/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. -/// -/// this enum implements `Deserr` in order to be used in the API. -macro_rules! make_locale { - - ($($language:tt), +) => { - #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] - #[deserr(rename_all = camelCase)] - #[serde(rename_all = "camelCase")] - pub enum Locale { - $($language),+, - } - - impl From for Locale { - fn from(other: milli::tokenizer::Language) -> Locale { - match other { - $(milli::tokenizer::Language::$language => Locale::$language), + - } - } - } - - impl From for milli::tokenizer::Language { - fn from(other: Locale) -> milli::tokenizer::Language { - match other { - $(Locale::$language => milli::tokenizer::Language::$language), +, - } - } - } - - #[derive(Debug)] - pub struct LocaleFormatError { - pub invalid_locale: String, - } - - impl std::fmt::Display for LocaleFormatError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::>().join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) - } - } - }; -} - -make_locale! { - Epo, - Eng, - Rus, - Cmn, - Spa, - Por, - Ita, - Ben, - Fra, - Deu, - Ukr, - Kat, - Ara, - Hin, - Jpn, - Heb, - Yid, - Pol, - Amh, - Jav, - Kor, - Nob, - Dan, - Swe, - Fin, - Tur, - Nld, - Hun, - Ces, - Ell, - Bul, - Bel, - Mar, - Kan, - Ron, - Slv, - Hrv, - Srp, - Mkd, - Lit, - Lav, - Est, - Tam, - Vie, - Urd, - Tha, - Guj, - Uzb, - Pan, - Aze, - Ind, - Tel, - Pes, - Mal, - Ori, - Mya, - Nep, - Sin, - Khm, - Tuk, - Aka, - Zul, - Sna, - Afr, - Lat, - Slk, - Cat, - Tgl, - Hye -} - -impl std::error::Error for LocaleFormatError {} - -impl std::str::FromStr for Locale { - type Err = LocaleFormatError; - - fn from_str(s: &str) -> Result { - milli::tokenizer::Language::from_code(s) - .map(Self::from) - .ok_or(LocaleFormatError { invalid_locale: s.to_string() }) - } -} #[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)] #[deserr(rename_all = camelCase)] @@ -155,3 +27,140 @@ impl From for LocalizedAttributesRule { } } } + +/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. +/// +/// this enum implements `Deserr` in order to be used in the API. +macro_rules! make_locale { + ($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] + #[deserr(rename_all = camelCase)] + #[serde(rename_all = "camelCase")] + pub enum Locale { + $($iso_639_1,)+ + $($iso_639_3,)+ + Cmn, + } + + impl From for Locale { + fn from(other: milli::tokenizer::Language) -> Locale { + match other { + $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ + milli::tokenizer::Language::Cmn => Locale::Cmn, + } + } + } + + impl From for milli::tokenizer::Language { + fn from(other: Locale) -> milli::tokenizer::Language { + match other { + $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ + $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ + Locale::Cmn => milli::tokenizer::Language::Cmn, + } + } + } + + impl std::str::FromStr for Locale { + type Err = LocaleFormatError; + + fn from_str(s: &str) -> Result { + let locale = match s { + $($iso_639_1_str => Locale::$iso_639_1,)+ + $($iso_639_3_str => Locale::$iso_639_3,)+ + "cmn" => Locale::Cmn, + _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), + }; + + Ok(locale) + } + } + + #[derive(Debug)] + pub struct LocaleFormatError { + pub invalid_locale: String, + } + + impl std::fmt::Display for LocaleFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"]; + valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right))); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", ")) + } + } + + impl std::error::Error for LocaleFormatError {} + }; +} + +make_locale!( + (Af, "af") => (Afr, "afr"), + (Ak, "ak") => (Aka, "aka"), + (Am, "am") => (Amh, "amh"), + (Ar, "ar") => (Ara, "ara"), + (Az, "az") => (Aze, "aze"), + (Be, "be") => (Bel, "bel"), + (Bn, "bn") => (Ben, "ben"), + (Bg, "bg") => (Bul, "bul"), + (Ca, "ca") => (Cat, "cat"), + (Cs, "cs") => (Ces, "ces"), + (Da, "da") => (Dan, "dan"), + (De, "de") => (Deu, "deu"), + (El, "el") => (Ell, "ell"), + (En, "en") => (Eng, "eng"), + (Eo, "eo") => (Epo, "epo"), + (Et, "et") => (Est, "est"), + (Fi, "fi") => (Fin, "fin"), + (Fr, "fr") => (Fra, "fra"), + (Gu, "gu") => (Guj, "guj"), + (He, "he") => (Heb, "heb"), + (Hi, "hi") => (Hin, "hin"), + (Hr, "hr") => (Hrv, "hrv"), + (Hu, "hu") => (Hun, "hun"), + (Hy, "hy") => (Hye, "hye"), + (Id, "id") => (Ind, "ind"), + (It, "it") => (Ita, "ita"), + (Jv, "jv") => (Jav, "jav"), + (Ja, "ja") => (Jpn, "jpn"), + (Kn, "kn") => (Kan, "kan"), + (Ka, "ka") => (Kat, "kat"), + (Km, "km") => (Khm, "khm"), + (Ko, "ko") => (Kor, "kor"), + (La, "la") => (Lat, "lat"), + (Lv, "lv") => (Lav, "lav"), + (Lt, "lt") => (Lit, "lit"), + (Ml, "ml") => (Mal, "mal"), + (Mr, "mr") => (Mar, "mar"), + (Mk, "mk") => (Mkd, "mkd"), + (My, "my") => (Mya, "mya"), + (Ne, "ne") => (Nep, "nep"), + (Nl, "nl") => (Nld, "nld"), + (Nb, "nb") => (Nob, "nob"), + (Or, "or") => (Ori, "ori"), + (Pa, "pa") => (Pan, "pan"), + (Fa, "fa") => (Pes, "pes"), + (Pl, "pl") => (Pol, "pol"), + (Pt, "pt") => (Por, "por"), + (Ro, "ro") => (Ron, "ron"), + (Ru, "ru") => (Rus, "rus"), + (Si, "si") => (Sin, "sin"), + (Sk, "sk") => (Slk, "slk"), + (Sl, "sl") => (Slv, "slv"), + (Sn, "sn") => (Sna, "sna"), + (Es, "es") => (Spa, "spa"), + (Sr, "sr") => (Srp, "srp"), + (Sv, "sv") => (Swe, "swe"), + (Ta, "ta") => (Tam, "tam"), + (Te, "te") => (Tel, "tel"), + (Tl, "tl") => (Tgl, "tgl"), + (Th, "th") => (Tha, "tha"), + (Tk, "tk") => (Tuk, "tuk"), + (Tr, "tr") => (Tur, "tur"), + (Uk, "uk") => (Ukr, "ukr"), + (Ur, "ur") => (Urd, "urd"), + (Uz, "uz") => (Uzb, "uzb"), + (Vi, "vi") => (Vie, "vie"), + (Yi, "yi") => (Yid, "yid"), + (Zh, "zh") => (Zho, "zho"), + (Zu, "zu") => (Zul, "zul"), +); diff --git a/meilisearch-types/src/versioning.rs b/meilisearch-types/src/versioning.rs index 3c4726403..2ec9d9b0c 100644 --- a/meilisearch-types/src/versioning.rs +++ b/meilisearch-types/src/versioning.rs @@ -10,38 +10,52 @@ static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR"); static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH"); /// Persists the version of the current Meilisearch binary to a VERSION file -pub fn create_version_file(db_path: &Path) -> io::Result<()> { +pub fn create_current_version_file(db_path: &Path) -> io::Result<()> { + create_version_file(db_path, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH) +} + +pub fn create_version_file( + db_path: &Path, + major: &str, + minor: &str, + patch: &str, +) -> io::Result<()> { let version_path = db_path.join(VERSION_FILE_NAME); - fs::write(version_path, format!("{}.{}.{}", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)) + fs::write(version_path, format!("{}.{}.{}", major, minor, patch)) } /// Ensures Meilisearch version is compatible with the database, returns an error versions mismatch. pub fn check_version_file(db_path: &Path) -> anyhow::Result<()> { - let version_path = db_path.join(VERSION_FILE_NAME); + let (major, minor, patch) = get_version(db_path)?; - match fs::read_to_string(version_path) { - Ok(version) => { - let version_components = version.split('.').collect::>(); - let (major, minor, patch) = match &version_components[..] { - [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), - _ => return Err(VersionFileError::MalformedVersionFile.into()), - }; - - if major != VERSION_MAJOR || minor != VERSION_MINOR { - return Err(VersionFileError::VersionMismatch { major, minor, patch }.into()); - } - } - Err(error) => { - return match error.kind() { - ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile.into()), - _ => Err(error.into()), - } - } + if major != VERSION_MAJOR || minor != VERSION_MINOR { + return Err(VersionFileError::VersionMismatch { major, minor, patch }.into()); } Ok(()) } +pub fn get_version(db_path: &Path) -> Result<(String, String, String), VersionFileError> { + let version_path = db_path.join(VERSION_FILE_NAME); + + match fs::read_to_string(version_path) { + Ok(version) => parse_version(&version), + Err(error) => match error.kind() { + ErrorKind::NotFound => Err(VersionFileError::MissingVersionFile), + _ => Err(error.into()), + }, + } +} + +pub fn parse_version(version: &str) -> Result<(String, String, String), VersionFileError> { + let version_components = version.split('.').collect::>(); + let (major, minor, patch) = match &version_components[..] { + [major, minor, patch] => (major.to_string(), minor.to_string(), patch.to_string()), + _ => return Err(VersionFileError::MalformedVersionFile), + }; + Ok((major, minor, patch)) +} + #[derive(thiserror::Error, Debug)] pub enum VersionFileError { #[error( @@ -58,4 +72,7 @@ pub enum VersionFileError { env!("CARGO_PKG_VERSION").to_string() )] VersionMismatch { major: String, minor: String, patch: String }, + + #[error(transparent)] + IoError(#[from] std::io::Error), } diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index e614ecc6a..2a16e1017 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] vietnamese = ["meilisearch-types/vietnamese"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"] +german = ["meilisearch-types/german"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 07350d506..f8d6a0fdc 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -646,8 +646,6 @@ pub struct SearchAggregator { max_vector_size: usize, // Whether the semantic ratio passed to a hybrid search equals the default ratio. semantic_ratio: bool, - // Whether a non-default embedder was specified - embedder: bool, hybrid: bool, retrieve_vectors: bool, @@ -795,7 +793,6 @@ impl SearchAggregator { if let Some(hybrid) = hybrid { ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); - ret.embedder = hybrid.embedder.is_some(); ret.hybrid = true; } @@ -863,7 +860,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -923,7 +919,6 @@ impl SearchAggregator { self.retrieve_vectors |= retrieve_vectors; self.semantic_ratio |= semantic_ratio; self.hybrid |= hybrid; - self.embedder |= embedder; // pagination self.max_limit = self.max_limit.max(max_limit); @@ -999,7 +994,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -1051,7 +1045,6 @@ impl SearchAggregator { "hybrid": { "enabled": hybrid, "semantic_ratio": semantic_ratio, - "embedder": embedder, }, "pagination": { "max_limit": max_limit, @@ -1782,7 +1775,6 @@ pub struct SimilarAggregator { used_syntax: HashMap, // Whether a non-default embedder was specified - embedder: bool, retrieve_vectors: bool, // pagination @@ -1803,7 +1795,7 @@ impl SimilarAggregator { pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { let SimilarQuery { id: _, - embedder, + embedder: _, offset, limit, attributes_to_retrieve: _, @@ -1851,7 +1843,6 @@ impl SimilarAggregator { ret.show_ranking_score_details = *show_ranking_score_details; ret.ranking_score_threshold = ranking_score_threshold.is_some(); - ret.embedder = embedder.is_some(); ret.retrieve_vectors = *retrieve_vectors; ret @@ -1883,7 +1874,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = other; @@ -1914,7 +1904,6 @@ impl SimilarAggregator { *used_syntax = used_syntax.saturating_add(value); } - self.embedder |= embedder; self.retrieve_vectors |= retrieve_vectors; // pagination @@ -1948,7 +1937,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = self; @@ -1980,9 +1968,6 @@ impl SimilarAggregator { "vector": { "retrieve_vectors": retrieve_vectors, }, - "hybrid": { - "embedder": embedder, - }, "pagination": { "max_limit": max_limit, "max_offset": max_offset, diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 41473245e..5c4ce171f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; +use meilisearch_types::milli::OrderBy; use serde_json::Value; use tokio::task::JoinError; @@ -27,10 +28,20 @@ pub enum MeilisearchHttpError { EmptyFilter, #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), - #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] + #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] FederationOptionsInNonFederatedRequest(usize), - #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] + #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), + #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] + FacetsInFederatedQuery(usize, String, Vec), + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")] + InconsistentFacetOrder { + facet: String, + previous_facet_order: OrderBy, + previous_uid: String, + index_facet_order: OrderBy, + current_uid: String, + }, #[error("A {0} payload is missing.")] MissingPayload(PayloadType), #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] @@ -61,7 +72,7 @@ pub enum MeilisearchHttpError { DocumentFormat(#[from] DocumentFormatError), #[error(transparent)] Join(#[from] JoinError), - #[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")] + #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")] MissingSearchHybrid, } @@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { Code::InvalidMultiSearchQueryPagination } + MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets, + MeilisearchHttpError::InconsistentFacetOrder { .. } => { + Code::InvalidMultiSearchFacetOrder + } } } } diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs index b33826141..b24f18fae 100644 --- a/meilisearch/src/lib.rs +++ b/meilisearch/src/lib.rs @@ -13,11 +13,10 @@ pub mod search_queue; use std::fs::File; use std::io::{BufReader, BufWriter}; -use std::num::NonZeroUsize; use std::path::Path; use std::str::FromStr; use std::sync::Arc; -use std::thread::{self, available_parallelism}; +use std::thread; use std::time::Duration; use actix_cors::Cors; @@ -37,7 +36,7 @@ use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchR use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::settings::apply_settings_to_builder; use meilisearch_types::tasks::KindWithContent; -use meilisearch_types::versioning::{check_version_file, create_version_file}; +use meilisearch_types::versioning::{check_version_file, create_current_version_file}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; pub use option::Opt; use option::ScheduleSnapshot; @@ -118,6 +117,7 @@ pub type LogStderrType = tracing_subscriber::filter::Filtered< pub fn create_app( index_scheduler: Data, auth_controller: Data, + search_queue: Data, opt: Opt, logs: (LogRouteHandle, LogStderrHandle), analytics: Arc, @@ -137,6 +137,7 @@ pub fn create_app( s, index_scheduler.clone(), auth_controller.clone(), + search_queue.clone(), &opt, logs, analytics.clone(), @@ -318,7 +319,7 @@ fn open_or_create_database_unchecked( match ( index_scheduler_builder(), auth_controller.map_err(anyhow::Error::from), - create_version_file(&opt.db_path).map_err(anyhow::Error::from), + create_current_version_file(&opt.db_path).map_err(anyhow::Error::from), ) { (Ok(i), Ok(a), Ok(())) => Ok((i, a)), (Err(e), _, _) | (_, Err(e), _) | (_, _, Err(e)) => { @@ -469,19 +470,16 @@ pub fn configure_data( config: &mut web::ServiceConfig, index_scheduler: Data, auth: Data, + search_queue: Data, opt: &Opt, (logs_route, logs_stderr): (LogRouteHandle, LogStderrHandle), analytics: Arc, ) { - let search_queue = SearchQueue::new( - opt.experimental_search_queue_size, - available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), - ); let http_payload_size_limit = opt.http_payload_size_limit.as_u64() as usize; config .app_data(index_scheduler) .app_data(auth) - .app_data(web::Data::new(search_queue)) + .app_data(search_queue) .app_data(web::Data::from(analytics)) .app_data(web::Data::new(logs_route)) .app_data(web::Data::new(logs_stderr)) diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 2e70b4eb7..b66bfc5b8 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -1,8 +1,10 @@ use std::env; use std::io::{stderr, LineWriter, Write}; +use std::num::NonZeroUsize; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; +use std::thread::available_parallelism; use actix_web::http::KeepAlive; use actix_web::web::Data; @@ -11,6 +13,7 @@ use index_scheduler::IndexScheduler; use is_terminal::IsTerminal; use meilisearch::analytics::Analytics; use meilisearch::option::LogMode; +use meilisearch::search_queue::SearchQueue; use meilisearch::{ analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer, @@ -148,11 +151,17 @@ async fn run_http( let opt_clone = opt.clone(); let index_scheduler = Data::from(index_scheduler); let auth_controller = Data::from(auth_controller); + let search_queue = SearchQueue::new( + opt.experimental_search_queue_size, + available_parallelism().unwrap_or(NonZeroUsize::new(2).unwrap()), + ); + let search_queue = Data::new(search_queue); let http_server = HttpServer::new(move || { create_app( index_scheduler.clone(), auth_controller.clone(), + search_queue.clone(), opt.clone(), logs.clone(), analytics.clone(), diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index a648987ca..1df80711d 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -81,7 +81,7 @@ pub async fn search( let index = index_scheduler.index(&index_uid)?; let features = index_scheduler.features(); let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?; - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_facet_search( &index, @@ -93,7 +93,9 @@ pub async fn search( locales, ) }) - .await?; + .await; + permit.drop().await; + let search_result = search_result?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index e60f95948..6a8eee521 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet { } } -impl From for SearchQuery { - fn from(other: SearchQueryGet) -> Self { +impl TryFrom for SearchQuery { + type Error = ResponseError; + + fn try_from(other: SearchQueryGet) -> Result { let filter = match other.filter { Some(f) => match serde_json::from_str(&f) { Ok(v) => Some(v), @@ -140,19 +142,28 @@ impl From for SearchQuery { let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) { (None, None) => None, - (None, Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None }) + (None, Some(_)) => { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(), + meilisearch_types::error::Code::InvalidHybridQuery, + )); + } + (Some(embedder), None) => { + Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder }) } - (Some(embedder), None) => Some(HybridQuery { - semantic_ratio: DEFAULT_SEMANTIC_RATIO(), - embedder: Some(embedder), - }), (Some(embedder), Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) }) + Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder }) } }; - Self { + if other.vector.is_some() && hybrid.is_none() { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `vector` is present".into(), + meilisearch_types::error::Code::MissingSearchHybrid, + )); + } + + Ok(Self { q: other.q, vector: other.vector.map(CS::into_inner), offset: other.offset.0, @@ -179,7 +190,7 @@ impl From for SearchQuery { hybrid, ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), locales: other.locales.map(|o| o.into_iter().collect()), - } + }) } } @@ -219,7 +230,7 @@ pub async fn search_with_url_query( debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let mut query: SearchQuery = params.into_inner().into(); + let mut query: SearchQuery = params.into_inner().try_into()?; // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { @@ -233,11 +244,13 @@ pub async fn search_with_url_query( let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?; - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features()) }) - .await?; + .await; + permit.drop().await; + let search_result = search_result?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); } @@ -276,11 +289,13 @@ pub async fn search_with_post( let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?; let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features()) }) - .await?; + .await; + permit.drop().await; + let search_result = search_result?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); if search_result.degraded { @@ -308,44 +323,36 @@ pub fn search_kind( features.check_vector("Passing `hybrid` as a parameter")?; } - // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing - if query.vector.is_none() { - match &query.q { - Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly), - None => return Ok(SearchKind::KeywordOnly), - _ => {} + // handle with care, the order of cases matters, the semantics is subtle + match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) { + // empty query, no vector => placeholder search + (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly), + // no query, no vector => placeholder search + (None, _, None) => Ok(SearchKind::KeywordOnly), + // hybrid.semantic_ratio == 1.0 => vector + (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { + SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len())) } - } - - match &query.hybrid { - Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => { - Ok(SearchKind::semantic( - index_scheduler, - index, - embedder.as_deref(), - query.vector.as_ref().map(Vec::len), - )?) - } - Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => { + // hybrid.semantic_ratio == 0.0 => keyword + (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { Ok(SearchKind::KeywordOnly) } - Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid( + // no query, hybrid, vector => semantic + (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => { + SearchKind::semantic(index_scheduler, index, embedder, Some(v.len())) + } + // query, no hybrid, no vector => keyword + (Some(_), None, None) => Ok(SearchKind::KeywordOnly), + // query, hybrid, maybe vector => hybrid + (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( index_scheduler, index, - embedder.as_deref(), + embedder, **semantic_ratio, - query.vector.as_ref().map(Vec::len), - )?), - None => match (query.q.as_deref(), query.vector.as_deref()) { - (_query, None) => Ok(SearchKind::KeywordOnly), - (None, Some(_vector)) => Ok(SearchKind::semantic( - index_scheduler, - index, - None, - query.vector.as_ref().map(Vec::len), - )?), - (Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), - }, + v.map(|v| v.len()), + ), + + (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), } } diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index e95a75f69..aaf8673d0 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -636,11 +636,26 @@ fn embedder_analytics( .any(|config| config.document_template.set().is_some()) }); + let document_template_max_bytes = setting.as_ref().and_then(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .filter_map(|config| config.document_template_max_bytes.set()) + .max() + }); + + let binary_quantization_used = setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }); + json!( { "total": setting.as_ref().map(|s| s.len()), "sources": sources, "document_template_used": document_template_used, + "document_template_max_bytes": document_template_max_bytes, + "binary_quantization_used": binary_quantization_used, } ) } diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 5027a473e..f94a02987 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -102,8 +102,8 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; - let (embedder_name, embedder) = - SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; + let (embedder_name, embedder, quantized) = + SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( @@ -111,6 +111,7 @@ async fn similar( query, embedder_name, embedder, + quantized, retrieve_vectors, index_scheduler.features(), ) @@ -139,8 +140,8 @@ pub struct SimilarQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError, default)] pub ranking_score_threshold: Option, - #[deserr(default, error = DeserrQueryParamError)] - pub embedder: Option, + #[deserr(error = DeserrQueryParamError)] + pub embedder: String, } #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index b8822488f..5fcb868c6 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -39,7 +39,7 @@ pub async fn multi_search_with_post( ) -> Result { // Since we don't want to process half of the search requests and then get a permit refused // we're going to get one permit for the whole duration of the multi-search request. - let _permit = search_queue.try_get_search_permit().await?; + let permit = search_queue.try_get_search_permit().await?; let federated_search = params.into_inner(); @@ -81,6 +81,7 @@ pub async fn multi_search_with_post( perform_federated_search(&index_scheduler, queries, federation, features) }) .await; + permit.drop().await; if let Ok(Ok(_)) = search_result { multi_aggregate.succeed(); @@ -143,6 +144,7 @@ pub async fn multi_search_with_post( Ok(search_results) } .await; + permit.drop().await; if search_results.is_ok() { multi_aggregate.succeed(); diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs index 3dc6520af..02f009ff7 100644 --- a/meilisearch/src/routes/tasks.rs +++ b/meilisearch/src/routes/tasks.rs @@ -616,7 +616,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids[1]`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -628,7 +628,7 @@ mod tests { let err = deserr_query_params::(params).unwrap_err(); snapshot!(meili_snap::json_string!(err), @r###" { - "message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 58005ec53..5279c26bb 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec}; use actix_http::StatusCode; use index_scheduler::{IndexScheduler, RoFeatures}; +use indexmap::IndexMap; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ - InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, + InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, + InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, + InvalidSearchOffset, }; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; -use meilisearch_types::milli::{self, DocumentId, TimeBudget}; +use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; use roaring::RoaringBitmap; use serde::Serialize; use super::ranking_rules::{self, RankingRules}; use super::{ - prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, - SearchQuery, SearchQueryWithIndex, + compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, + HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, }; use crate::error::MeilisearchHttpError; use crate::routes::indexes::search::search_kind; @@ -73,6 +77,17 @@ pub struct Federation { pub limit: usize, #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, + #[deserr(default, error = DeserrJsonError)] + pub facets_by_index: BTreeMap>>, + #[deserr(default, error = DeserrJsonError)] + pub merge_facets: Option, +} + +#[derive(Copy, Clone, Debug, deserr::Deserr, Default)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct MergeFacets { + #[deserr(default, error = DeserrJsonError)] + pub max_values_per_facet: Option, } #[derive(Debug, deserr::Deserr)] @@ -82,7 +97,7 @@ pub struct FederatedSearch { #[deserr(default)] pub federation: Option, } -#[derive(Serialize, Clone, PartialEq)] +#[derive(Serialize, Clone)] #[serde(rename_all = "camelCase")] pub struct FederatedSearchResult { pub hits: Vec, @@ -93,6 +108,13 @@ pub struct FederatedSearchResult { #[serde(skip_serializing_if = "Option::is_none")] pub semantic_hit_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, + #[serde(skip_serializing_if = "FederatedFacets::is_empty")] + pub facets_by_index: FederatedFacets, + // These fields are only used for analytics purposes #[serde(skip)] pub degraded: bool, @@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult { semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, } = self; let mut debug = f.debug_struct("SearchResult"); @@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult { if *degraded { debug.field("degraded", degraded); } + if let Some(facet_distribution) = facet_distribution { + debug.field("facet_distribution", &facet_distribution); + } + if let Some(facet_stats) = facet_stats { + debug.field("facet_stats", &facet_stats); + } if let Some(semantic_hit_count) = semantic_hit_count { debug.field("semantic_hit_count", &semantic_hit_count); } + if !facets_by_index.is_empty() { + debug.field("facets_by_index", &facets_by_index); + } debug.finish() } @@ -313,16 +347,104 @@ struct SearchHitByIndex { } struct SearchResultByIndex { + index: String, hits: Vec, - candidates: RoaringBitmap, + estimated_total_hits: usize, degraded: bool, used_negative_operator: bool, + facets: Option, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct FederatedFacets(pub BTreeMap); + +impl FederatedFacets { + pub fn insert(&mut self, index: String, facets: Option) { + if let Some(facets) = facets { + self.0.insert(index, facets); + } + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn merge( + self, + MergeFacets { max_values_per_facet }: MergeFacets, + facet_order: BTreeMap, + ) -> Option { + if self.is_empty() { + return None; + } + + let mut distribution: BTreeMap = Default::default(); + let mut stats: BTreeMap = Default::default(); + + for facets_by_index in self.0.into_values() { + for (facet, index_distribution) in facets_by_index.distribution { + match distribution.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_distribution); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let distribution = entry.get_mut(); + + for (value, index_count) in index_distribution { + distribution + .entry(value) + .and_modify(|count| *count += index_count) + .or_insert(index_count); + } + } + } + } + + for (facet, index_stats) in facets_by_index.stats { + match stats.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_stats); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let stats = entry.get_mut(); + + stats.min = f64::min(stats.min, index_stats.min); + stats.max = f64::max(stats.max, index_stats.max); + } + } + } + } + + // fixup order + for (facet, values) in &mut distribution { + let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); + + match order_by { + OrderBy::Lexicographic => { + values.sort_unstable_by(|left, _, right, _| left.cmp(right)) + } + OrderBy::Count => { + values.sort_unstable_by(|_, left, _, right| { + left.cmp(right) + // biggest first + .reverse() + }) + } + } + + if let Some(max_values_per_facet) = max_values_per_facet { + values.truncate(max_values_per_facet) + }; + } + + Some(ComputedFacets { distribution, stats }) + } } pub fn perform_federated_search( index_scheduler: &IndexScheduler, queries: Vec, - federation: Federation, + mut federation: Federation, features: RoFeatures, ) -> Result { let before_search = std::time::Instant::now(); @@ -342,6 +464,16 @@ pub fn perform_federated_search( .into()); } + if let Some(facets) = federated_query.has_facets() { + let facets = facets.to_owned(); + return Err(MeilisearchHttpError::FacetsInFederatedQuery( + query_index, + federated_query.index_uid.into_inner(), + facets, + ) + .into()); + } + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { @@ -353,13 +485,24 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; + // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search let mut semantic_hit_count = None; let mut results_by_index = Vec::with_capacity(queries_by_index.len()); let mut previous_query_data: Option<(RankingRules, usize, String)> = None; + // remember the order and name of first index for each facet when merging with index settings + // to detect if the order is inconsistent for a facet. + let mut facet_order: Option> = match federation.merge_facets + { + Some(MergeFacets { .. }) => Some(Default::default()), + _ => None, + }; + for (index_uid, queries) in queries_by_index { + let first_query_index = queries.first().map(|query| query.query_index); + let index = match index_scheduler.index(&index_uid) { Ok(index) => index, Err(err) => { @@ -367,9 +510,8 @@ pub fn perform_federated_search( // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but // here the resource not found is not part of the URL. err.code = StatusCode::BAD_REQUEST; - if let Some(query) = queries.first() { - err.message = - format!("Inside `.queries[{}]`: {}", query.query_index, err.message); + if let Some(query_index) = first_query_index { + err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); } return Err(err); } @@ -394,6 +536,23 @@ pub fn perform_federated_search( let mut used_negative_operator = false; let mut candidates = RoaringBitmap::new(); + let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); + + // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", + if let Some(query_index) = first_query_index { + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + return Err(error); + } + // 2.1. Compute all candidates for each query in the index let mut results_by_query = Vec::with_capacity(queries.len()); @@ -562,34 +721,116 @@ pub fn perform_federated_search( .collect(); let merged_result = merged_result?; + + let estimated_total_hits = candidates.len() as usize; + + let facets = facets_by_index + .map(|facets_by_index| { + compute_facet_distribution_stats( + &facets_by_index, + &index, + &rtxn, + candidates, + super::Route::MultiSearch, + ) + }) + .transpose() + .map_err(|mut error| { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", + error.message, + if let Some(query_index) = first_query_index { + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + error + })?; + results_by_index.push(SearchResultByIndex { + index: index_uid, hits: merged_result, - candidates, + estimated_total_hits, degraded, used_negative_operator, + facets, }); } + // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. + for (index_uid, facets) in federation.facets_by_index { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", + err.message + ); + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", + ); + return Err(error); + } + + if let Some(facets) = facets { + if let Err(mut error) = compute_facet_distribution_stats( + &facets, + &index, + &rtxn, + Default::default(), + super::Route::MultiSearch, + ) { + error.message = + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); + return Err(error); + } + } + } + // 3. merge hits and metadata across indexes // 3.1 merge metadata - let (estimated_total_hits, degraded, used_negative_operator) = { + let (estimated_total_hits, degraded, used_negative_operator, facets) = { let mut estimated_total_hits = 0; let mut degraded = false; let mut used_negative_operator = false; + let mut facets: FederatedFacets = FederatedFacets::default(); + for SearchResultByIndex { + index, hits: _, - candidates, + estimated_total_hits: estimated_total_hits_by_index, + facets: facets_by_index, degraded: degraded_by_index, used_negative_operator: used_negative_operator_by_index, - } in &results_by_index + } in &mut results_by_index { - estimated_total_hits += candidates.len() as usize; + estimated_total_hits += *estimated_total_hits_by_index; degraded |= *degraded_by_index; used_negative_operator |= *used_negative_operator_by_index; + + let facets_by_index = std::mem::take(facets_by_index); + let index = std::mem::take(index); + + facets.insert(index, facets_by_index); } - (estimated_total_hits, degraded, used_negative_operator) + (estimated_total_hits, degraded, used_negative_operator, facets) }; // 3.2 merge hits @@ -606,6 +847,20 @@ pub fn perform_federated_search( .map(|hit| hit.hit) .collect(); + let (facet_distribution, facet_stats, facets_by_index) = + match federation.merge_facets.zip(facet_order) { + Some((merge_facets, facet_order)) => { + let facets = facets.merge(merge_facets, facet_order); + + let (facet_distribution, facet_stats) = facets + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); + + (facet_distribution, facet_stats, FederatedFacets::default()) + } + None => (None, None, facets), + }; + let search_result = FederatedSearchResult { hits: merged_hits, processing_time_ms: before_search.elapsed().as_millis(), @@ -617,7 +872,39 @@ pub fn perform_federated_search( semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, }; Ok(search_result) } + +fn check_facet_order( + facet_order: &mut Option>, + current_index: &str, + facets_by_index: &Option>, + index: &milli::Index, + rtxn: &milli::heed::RoTxn<'_>, +) -> Result<(), ResponseError> { + if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { + let index_facet_order = index.sort_facet_values_by(rtxn)?; + for facet in facets_by_index { + let index_facet_order = index_facet_order.get(facet); + let (previous_index, previous_facet_order) = facet_order + .entry(facet.to_owned()) + .or_insert_with(|| (current_index.to_owned(), index_facet_order)); + if previous_facet_order != &index_facet_order { + return Err(MeilisearchHttpError::InconsistentFacetOrder { + facet: facet.clone(), + previous_facet_order: *previous_facet_order, + previous_uid: previous_index.clone(), + current_uid: current_index.to_owned(), + index_facet_order, + } + .into()); + } + } + }; + Ok(()) +} diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 915505be0..66b6e56de 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery { pub struct HybridQuery { #[deserr(default, error = DeserrJsonError, default)] pub semantic_ratio: SemanticRatio, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, } #[derive(Clone)] pub enum SearchKind { KeywordOnly, - SemanticOnly { embedder_name: String, embedder: Arc }, - Hybrid { embedder_name: String, embedder: Arc, semantic_ratio: f32 }, + SemanticOnly { embedder_name: String, embedder: Arc, quantized: bool }, + Hybrid { embedder_name: String, embedder: Arc, quantized: bool, semantic_ratio: f32 }, } impl SearchKind { pub(crate) fn semantic( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::SemanticOnly { embedder_name, embedder }) + Ok(Self::SemanticOnly { embedder_name, embedder, quantized }) } pub(crate) fn hybrid( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, semantic_ratio: f32, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio }) + Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio }) } pub(crate) fn embedder( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, - ) -> Result<(String, Arc), ResponseError> { + ) -> Result<(String, Arc, bool), ResponseError> { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; - let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name()); - - let embedder = embedders.get(embedder_name); - - let embedder = embedder + let (embedder, _, quantized) = embedders + .get(embedder_name) .ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned())) - .map_err(milli::Error::from)? - .0; + .map_err(milli::Error::from)?; if let Some(vector_len) = vector_len { if vector_len != embedder.dimensions() { @@ -332,7 +328,7 @@ impl SearchKind { } } - Ok((embedder_name.to_owned(), embedder)) + Ok((embedder_name.to_owned(), embedder, quantized)) } } @@ -441,9 +437,6 @@ pub struct SearchQueryWithIndex { } impl SearchQueryWithIndex { - pub fn has_federation_options(&self) -> bool { - self.federation_options.is_some() - } pub fn has_pagination(&self) -> Option<&'static str> { if self.offset.is_some() { Some("offset") @@ -458,6 +451,10 @@ impl SearchQueryWithIndex { } } + pub fn has_facets(&self) -> Option<&[String]> { + self.facets.as_deref().filter(|v| !v.is_empty()) + } + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, @@ -537,8 +534,8 @@ pub struct SimilarQuery { pub limit: usize, #[deserr(default, error = DeserrJsonError)] pub filter: Option, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, #[deserr(default, error = DeserrJsonError)] pub attributes_to_retrieve: Option>, #[deserr(default, error = DeserrJsonError)] @@ -792,7 +789,7 @@ fn prepare_search<'t>( search.query(q); } } - SearchKind::SemanticOnly { embedder_name, embedder } => { + SearchKind::SemanticOnly { embedder_name, embedder, quantized } => { let vector = match query.vector.clone() { Some(vector) => vector, None => { @@ -806,14 +803,19 @@ fn prepare_search<'t>( } }; - search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); + search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector)); } - SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => { + SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => { if let Some(q) = &query.q { search.query(q); } // will be embedded in hybrid search if necessary - search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone()); + search.semantic( + embedder_name.clone(), + embedder.clone(), + *quantized, + query.vector.clone(), + ); } } @@ -987,39 +989,13 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } }; - let (facet_distribution, facet_stats) = match facets { - Some(ref fields) => { - let mut facet_distribution = index.facets_distribution(&rtxn); - - let max_values_by_facet = index - .max_values_per_facet(&rtxn) - .map_err(milli::Error::from)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - let sort_facet_values_by = - index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; - - if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = - fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); - facet_distribution.facets(fields); - } - - let distribution = facet_distribution - .candidates(candidates) - .default_order_by(sort_facet_values_by.get("*")) - .execute()?; - let stats = facet_distribution.compute_stats()?; - (Some(distribution), Some(stats)) - } - None => (None, None), - }; - - let facet_stats = facet_stats.map(|stats| { - stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() - }); + let (facet_distribution, facet_stats) = facets + .map(move |facets| { + compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search) + }) + .transpose()? + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); let result = SearchResult { hits: documents, @@ -1035,6 +1011,61 @@ pub fn perform_search( Ok(result) } +#[derive(Debug, Clone, Default, Serialize)] +pub struct ComputedFacets { + pub distribution: BTreeMap>, + pub stats: BTreeMap, +} + +enum Route { + Search, + MultiSearch, +} + +fn compute_facet_distribution_stats>( + facets: &[S], + index: &Index, + rtxn: &RoTxn, + candidates: roaring::RoaringBitmap, + route: Route, +) -> Result { + let mut facet_distribution = index.facets_distribution(rtxn); + + let max_values_by_facet = index + .max_values_per_facet(rtxn) + .map_err(milli::Error::from)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET); + + facet_distribution.max_values_per_facet(max_values_by_facet); + + let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; + + // add specific facet if there is no placeholder + if facets.iter().all(|f| f.as_ref() != "*") { + let fields: Vec<_> = + facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect(); + facet_distribution.facets(fields); + } + + let distribution = facet_distribution + .candidates(candidates) + .default_order_by(sort_facet_values_by.get("*")) + .execute() + .map_err(|error| match (error, route) { + ( + error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution { + .. + }), + Route::MultiSearch, + ) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets), + (error, _) => error.into(), + })?; + let stats = facet_distribution.compute_stats()?; + let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); + Ok(ComputedFacets { distribution, stats }) +} + pub fn search_from_kind( search_kind: SearchKind, search: milli::Search<'_>, @@ -1413,6 +1444,7 @@ pub fn perform_similar( query: SimilarQuery, embedder_name: String, embedder: Arc, + quantized: bool, retrieve_vectors: RetrieveVectors, features: RoFeatures, ) -> Result { @@ -1441,8 +1473,16 @@ pub fn perform_similar( )); }; - let mut similar = - milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); + let mut similar = milli::Similar::new( + internal_id, + offset, + limit, + index, + &rtxn, + embedder_name, + embedder, + quantized, + ); if let Some(ref filter) = query.filter { if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs index 4f6dccc42..195fa1b6f 100644 --- a/meilisearch/src/search_queue.rs +++ b/meilisearch/src/search_queue.rs @@ -18,6 +18,7 @@ //! And should drop the Permit only once you have freed all the RAM consumed by the method. use std::num::NonZeroUsize; +use std::time::Duration; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -29,16 +30,31 @@ use crate::error::MeilisearchHttpError; pub struct SearchQueue { sender: mpsc::Sender>, capacity: usize, + /// If we have waited longer than this to get a permit, we should abort the search request entirely. + /// The client probably already closed the connection, but we have no way to find out. + time_to_abort: Duration, } /// You should only run search requests while holding this permit. /// Once it's dropped, a new search request will be able to process. +/// You should always try to drop the permit yourself calling the `drop` async method on it. #[derive(Debug)] pub struct Permit { sender: mpsc::Sender<()>, } +impl Permit { + /// Drop the permit giving back on permit to the search queue. + pub async fn drop(self) { + // if the channel is closed then the whole instance is down + let _ = self.sender.send(()).await; + } +} + impl Drop for Permit { + /// The implicit drop implementation can still be called in multiple cases: + /// - We forgot to call the explicit one somewhere => this should be fixed on our side asap + /// - The future is cancelled while running and the permit dropped with it fn drop(&mut self) { let sender = self.sender.clone(); // if the channel is closed then the whole instance is down @@ -53,7 +69,11 @@ impl SearchQueue { let (sender, receiver) = mpsc::channel(1); tokio::task::spawn(Self::run(capacity, paralellism, receiver)); - Self { sender, capacity } + Self { sender, capacity, time_to_abort: Duration::from_secs(60) } + } + + pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self { + Self { time_to_abort, ..self } } /// This function is the main loop, it's in charge on scheduling which search request should execute first and @@ -119,9 +139,23 @@ impl SearchQueue { /// Returns a search `Permit`. /// It should be dropped as soon as you've freed all the RAM associated with the search request being processed. pub async fn try_get_search_permit(&self) -> Result { + let now = std::time::Instant::now(); let (sender, receiver) = oneshot::channel(); self.sender.send(sender).await.map_err(|_| MeilisearchHttpError::SearchLimiterIsDown)?; - receiver.await.map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity)) + let permit = receiver + .await + .map_err(|_| MeilisearchHttpError::TooManySearchRequests(self.capacity))?; + + // If we've been for more than one minute to get a search permit, it's better to simply + // abort the search request than spending time processing something were the client + // most certainly exited or got a timeout a long time ago. + // We may find a better solution in https://github.com/actix/actix-web/issues/3462. + if now.elapsed() > self.time_to_abort { + permit.drop().await; + Err(MeilisearchHttpError::TooManySearchRequests(self.capacity)) + } else { + Ok(permit) + } } /// Returns `Ok(())` if everything seems normal. diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 381bd1cb4..784067c2d 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -272,6 +272,20 @@ impl<'a> Index<'a, Shared> { } (task, code) } + + pub async fn delete_index_fail(&self) -> (Value, StatusCode) { + let (mut task, code) = self._delete().await; + if code.is_success() { + task = self.wait_task(task.uid()).await; + if task.is_success() { + panic!( + "`delete_index_fail` succeeded: {}", + serde_json::to_string_pretty(&task).unwrap() + ); + } + } + (task, code) + } } #[allow(dead_code)] @@ -314,6 +328,12 @@ impl Index<'_, State> { }); self.service.post_encoded("/indexes", body, self.encoder).await } + + pub(super) async fn _delete(&self) -> (Value, StatusCode) { + let url = format!("/indexes/{}", urlencode(self.uid.as_ref())); + self.service.delete(url).await + } + pub async fn wait_task(&self, update_id: u64) -> Value { // try several times to get status, or panic to not wait forever let url = format!("/tasks/{}", update_id); diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index ab3717e22..6d331ebbc 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -11,13 +11,11 @@ use actix_web::http::StatusCode; use byte_unit::{Byte, Unit}; use clap::Parser; use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt}; -use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; +use meilisearch::setup_meilisearch; use once_cell::sync::Lazy; use tempfile::TempDir; use tokio::sync::OnceCell; use tokio::time::sleep; -use tracing::level_filters::LevelFilter; -use tracing_subscriber::Layer; use uuid::Uuid; use super::index::Index; @@ -183,7 +181,7 @@ impl Server { let options = default_settings(dir.path()); let (index_scheduler, auth) = setup_meilisearch(&options).unwrap(); - let service = Service { index_scheduler, auth, options, api_key: None }; + let service = Service { index_scheduler, auth, api_key: None, options }; Server { service, _dir: Some(dir), _marker: PhantomData } } @@ -263,28 +261,7 @@ impl Server { Response = ServiceResponse, Error = actix_web::Error, > { - let (_route_layer, route_layer_handle) = - tracing_subscriber::reload::Layer::new(None.with_filter( - tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), - )); - let (_stderr_layer, stderr_layer_handle) = tracing_subscriber::reload::Layer::new( - (Box::new( - tracing_subscriber::fmt::layer() - .with_span_events(tracing_subscriber::fmt::format::FmtSpan::CLOSE), - ) - as Box + Send + Sync>) - .with_filter(tracing_subscriber::filter::Targets::new()), - ); - - actix_web::test::init_service(create_app( - self.service.index_scheduler.clone().into(), - self.service.auth.clone().into(), - self.service.options.clone(), - (route_layer_handle, stderr_layer_handle), - analytics::MockAnalytics::new(&self.service.options), - true, - )) - .await + self.service.init_web_app().await } pub async fn list_api_keys(&self, params: &str) -> (Value, StatusCode) { diff --git a/meilisearch/tests/common/service.rs b/meilisearch/tests/common/service.rs index cd78253aa..8addbacf8 100644 --- a/meilisearch/tests/common/service.rs +++ b/meilisearch/tests/common/service.rs @@ -1,10 +1,15 @@ +use std::num::NonZeroUsize; use std::sync::Arc; +use actix_web::body::MessageBody; +use actix_web::dev::ServiceResponse; use actix_web::http::header::ContentType; use actix_web::http::StatusCode; use actix_web::test; use actix_web::test::TestRequest; +use actix_web::web::Data; use index_scheduler::IndexScheduler; +use meilisearch::search_queue::SearchQueue; use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; use meilisearch_auth::AuthController; use tracing::level_filters::LevelFilter; @@ -106,7 +111,13 @@ impl Service { self.request(req).await } - pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + pub async fn init_web_app( + &self, + ) -> impl actix_web::dev::Service< + actix_http::Request, + Response = ServiceResponse, + Error = actix_web::Error, + > { let (_route_layer, route_layer_handle) = tracing_subscriber::reload::Layer::new(None.with_filter( tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF), @@ -119,16 +130,25 @@ impl Service { as Box + Send + Sync>) .with_filter(tracing_subscriber::filter::Targets::new()), ); + let search_queue = SearchQueue::new( + self.options.experimental_search_queue_size, + NonZeroUsize::new(1).unwrap(), + ); - let app = test::init_service(create_app( + actix_web::test::init_service(create_app( self.index_scheduler.clone().into(), self.auth.clone().into(), + Data::new(search_queue), self.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&self.options), true, )) - .await; + .await + } + + pub async fn request(&self, mut req: test::TestRequest) -> (Value, StatusCode) { + let app = self.init_web_app().await; if let Some(api_key) = &self.api_key { req = req.insert_header(("Authorization", ["Bearer ", api_key].concat())); diff --git a/meilisearch/tests/content_type.rs b/meilisearch/tests/content_type.rs index 0fc5b26ac..5ef8a4dce 100644 --- a/meilisearch/tests/content_type.rs +++ b/meilisearch/tests/content_type.rs @@ -6,6 +6,7 @@ use actix_web::test; use crate::common::{Server, Value}; +#[derive(Debug)] enum HttpVerb { Put, Patch, @@ -80,7 +81,7 @@ async fn error_json_bad_content_type() { let status_code = res.status(); let body = test::read_body(res).await; let response: Value = serde_json::from_slice(&body).unwrap_or_default(); - assert_eq!(status_code, 415, "calling the route `{}` without content-type is supposed to throw a bad media type error", route); + assert_eq!(status_code, 415, "calling the route `{verb:?} {route}` without content-type is supposed to throw a bad media type error:\n{}", String::from_utf8_lossy(&body)); assert_eq!( response, json!({ diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 819b2ddc2..c37b3a5e3 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1023,7 +1023,7 @@ async fn error_document_add_create_index_bad_uid() { snapshot!(json_string!(response), @r###" { - "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -1280,7 +1280,7 @@ async fn error_add_documents_bad_document_id() { "indexedDocuments": 0 }, "error": { - "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", + "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_document_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_id" diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 280073f51..4c644ae98 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -525,7 +525,7 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -723,7 +723,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/meilisearch/tests/documents/update_documents.rs b/meilisearch/tests/documents/update_documents.rs index a5d466513..195dca914 100644 --- a/meilisearch/tests/documents/update_documents.rs +++ b/meilisearch/tests/documents/update_documents.rs @@ -11,7 +11,7 @@ async fn error_document_update_create_index_bad_uid() { let (response, code) = index.update_documents(json!([{"id": 1}]), None).await; let expected_response = json!({ - "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`883 fj!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -173,7 +173,7 @@ async fn error_update_documents_bad_document_id() { assert_eq!( response["error"]["message"], json!( - r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_)."# + r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."# ) ); assert_eq!(response["error"]["code"], json!("invalid_document_id")); diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index ea98e200f..1c1a8e649 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -2097,7 +2097,8 @@ async fn generate_and_import_dump_containing_vectors() { "source": "huggingFace", "model": "sentence-transformers/all-MiniLM-L6-v2", "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", - "documentTemplate": "{{doc.doggo}}" + "documentTemplate": "{{doc.doggo}}", + "documentTemplateMaxBytes": 400 } }, "searchCutoffMs": null, diff --git a/meilisearch/tests/index/create_index.rs b/meilisearch/tests/index/create_index.rs index b51ccab51..e99244722 100644 --- a/meilisearch/tests/index/create_index.rs +++ b/meilisearch/tests/index/create_index.rs @@ -125,11 +125,11 @@ async fn create_index_with_primary_key() { #[actix_rt::test] async fn create_index_with_invalid_primary_key() { - let document = json!([ { "id": 2, "title": "Pride and Prejudice" } ]); + let documents = json!([ { "id": 2, "title": "Pride and Prejudice" } ]); let server = Server::new().await; let index = server.index("movies"); - let (_response, code) = index.add_documents(document, Some("title")).await; + let (_response, code) = index.add_documents(documents, Some("title")).await; assert_eq!(code, 202); index.wait_task(0).await; @@ -137,6 +137,17 @@ async fn create_index_with_invalid_primary_key() { let (response, code) = index.get().await; assert_eq!(code, 200); assert_eq!(response["primaryKey"], json!(null)); + + let documents = json!([ { "id": "e".repeat(513) } ]); + + let (_response, code) = index.add_documents(documents, Some("id")).await; + assert_eq!(code, 202); + + index.wait_task(1).await; + + let (response, code) = index.get().await; + assert_eq!(code, 200); + assert_eq!(response["primaryKey"], json!(null)); } #[actix_rt::test] @@ -192,7 +203,7 @@ async fn error_create_with_invalid_index_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.uid`: `test test#!` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/index/delete_index.rs b/meilisearch/tests/index/delete_index.rs index e404a6003..03185d21a 100644 --- a/meilisearch/tests/index/delete_index.rs +++ b/meilisearch/tests/index/delete_index.rs @@ -1,51 +1,50 @@ -use crate::common::Server; +use crate::common::{shared_does_not_exists_index, Server}; use crate::json; #[actix_rt::test] async fn create_and_delete_index() { - let server = Server::new().await; - let index = server.index("test"); - let (_response, code) = index.create(None).await; + let server = Server::new_shared(); + let index = server.unique_index(); + let (response, code) = index.create(None).await; assert_eq!(code, 202); - index.wait_task(0).await; + index.wait_task(response.uid()).await.succeeded(); assert_eq!(index.get().await.1, 200); - let (_response, code) = index.delete().await; + let (response, code) = index.delete().await; assert_eq!(code, 202); - index.wait_task(1).await; + index.wait_task(response.uid()).await.succeeded(); assert_eq!(index.get().await.1, 404); } #[actix_rt::test] async fn error_delete_unexisting_index() { - let server = Server::new().await; - let index = server.index("test"); - let (_, code) = index.delete().await; + let index = shared_does_not_exists_index().await; + let (task, code) = index.delete_index_fail().await; assert_eq!(code, 202); let expected_response = json!({ - "message": "Index `test` not found.", + "message": "Index `DOES_NOT_EXISTS` not found.", "code": "index_not_found", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#index_not_found" }); - let response = index.wait_task(0).await; + let response = index.wait_task(task.uid()).await; assert_eq!(response["status"], "failed"); assert_eq!(response["error"], expected_response); } #[actix_rt::test] async fn loop_delete_add_documents() { - let server = Server::new().await; - let index = server.index("test"); + let server = Server::new_shared(); + let index = server.unique_index(); let documents = json!([{"id": 1, "field1": "hello"}]); let mut tasks = Vec::new(); for _ in 0..50 { diff --git a/meilisearch/tests/index/errors.rs b/meilisearch/tests/index/errors.rs index 9c677ee12..3bab83955 100644 --- a/meilisearch/tests/index/errors.rs +++ b/meilisearch/tests/index/errors.rs @@ -75,7 +75,7 @@ async fn create_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.uid`: `the best doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -136,7 +136,7 @@ async fn get_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -232,7 +232,7 @@ async fn update_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -247,7 +247,7 @@ async fn delete_index_bad_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/index/get_index.rs b/meilisearch/tests/index/get_index.rs index 5a184c8ce..ce08251be 100644 --- a/meilisearch/tests/index/get_index.rs +++ b/meilisearch/tests/index/get_index.rs @@ -186,7 +186,7 @@ async fn get_invalid_index_uid() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`this is not a valid index name` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/logs/mod.rs b/meilisearch/tests/logs/mod.rs index 3b36d78f8..9f4649dca 100644 --- a/meilisearch/tests/logs/mod.rs +++ b/meilisearch/tests/logs/mod.rs @@ -1,10 +1,13 @@ mod error; +use std::num::NonZeroUsize; use std::rc::Rc; use std::str::FromStr; use actix_web::http::header::ContentType; +use actix_web::web::Data; use meili_snap::snapshot; +use meilisearch::search_queue::SearchQueue; use meilisearch::{analytics, create_app, Opt, SubscriberForSecondLayer}; use tracing::level_filters::LevelFilter; use tracing_subscriber::layer::SubscriberExt; @@ -40,10 +43,15 @@ async fn basic_test_log_stream_route() { .with_span_events(tracing_subscriber::fmt::format::FmtSpan::ACTIVE) .with_filter(tracing_subscriber::filter::LevelFilter::from_str("OFF").unwrap()), ); + let search_queue = SearchQueue::new( + server.service.options.experimental_search_queue_size, + NonZeroUsize::new(1).unwrap(), + ); let app = actix_web::test::init_service(create_app( server.service.index_scheduler.clone().into(), server.service.auth.clone().into(), + Data::new(search_queue), server.service.options.clone(), (route_layer_handle, stderr_layer_handle), analytics::MockAnalytics::new(&server.service.options), diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index fee7eef7d..6840f8fba 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() { .search(json!({"filter": "title & Glass"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() { .search(json!({"filter": ["title & Glass"]}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index ee4181694..e301c0b05 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -128,7 +128,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -137,7 +137,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -146,7 +146,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -161,7 +161,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -174,7 +174,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -188,8 +188,11 @@ async fn simple_search_hf() { let server = Server::new().await; let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - let (response, code) = - index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await; + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}), + ) + .await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); snapshot!(response["semanticHitCount"], @"0"); @@ -197,7 +200,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( // disable ranking score as the vectors between architectures are not equal - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -206,7 +209,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -215,7 +218,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -224,7 +227,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -237,7 +240,7 @@ async fn distribution_shift() { let server = Server::new().await; let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; - let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); + let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}); let (response, code) = index.search_post(search.clone()).await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); @@ -271,7 +274,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.2}, + "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true, "attributesToHighlight": [ "desc", @@ -287,7 +290,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.8}, + "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -304,7 +307,7 @@ async fn highlighter() { // no highlighting on full semantic let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 1.0}, + "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}), ) .unwrap(), ) @@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}), ) .unwrap(), ) @@ -398,7 +401,7 @@ async fn single_document() { let (response, code) = index .search_post( - json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -414,7 +417,7 @@ async fn query_combination() { // search without query and vector, but with hybrid => still placeholder let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -423,7 +426,7 @@ async fn query_combination() { // same with a different semantic ratio let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -432,7 +435,7 @@ async fn query_combination() { // wrong vector dimensions let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"400 Bad Request"); @@ -447,7 +450,7 @@ async fn query_combination() { // full vector let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -456,7 +459,7 @@ async fn query_combination() { // full keyword, without a query let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -465,7 +468,7 @@ async fn query_combination() { // query + vector, full keyword => keyword let (response, code) = index - .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -480,7 +483,7 @@ async fn query_combination() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.", + "message": "Invalid request: missing `hybrid` parameter when `vector` is present.", "code": "missing_search_hybrid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#missing_search_hybrid" @@ -490,7 +493,7 @@ async fn query_combination() { // full vector, without a vector => error let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -507,7 +510,7 @@ async fn query_combination() { // hybrid without a vector => full keyword let (response, code) = index .search_post( - json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -523,7 +526,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -573,7 +576,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index dbc4fcc30..c01d854e2 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -103,41 +103,12 @@ async fn simple_search() { // english index - .search(json!({"q": "Atta", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "Atta", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "Atta", @@ -153,35 +124,12 @@ async fn simple_search() { // japanese index - .search(json!({"q": "進撃", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "進撃", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "進撃", @@ -197,68 +145,16 @@ async fn simple_search() { index .search( - json!({"q": "進撃", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "進撃", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 }, { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "進撃", @@ -275,67 +171,15 @@ async fn simple_search() { // chinese index - .search(json!({"q": "进击", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 }, { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "进击", @@ -382,36 +226,13 @@ async fn force_locales() { // chinese detection index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -429,36 +250,13 @@ async fn force_locales() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -506,36 +304,13 @@ async fn force_locales_with_pattern() { // chinese detection index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -553,36 +328,13 @@ async fn force_locales_with_pattern() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -628,7 +380,7 @@ async fn force_locales_with_pattern_nested() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -648,60 +400,13 @@ async fn force_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "\"进击的巨人\"", @@ -750,7 +455,7 @@ async fn force_different_locales_with_pattern() { // force chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -770,36 +475,13 @@ async fn force_different_locales_with_pattern() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -851,7 +533,7 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { // auto infer any language index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -871,36 +553,13 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { // should infer chinese index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["name_zh", "description_zh"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -947,36 +606,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -992,37 +628,14 @@ async fn auto_infer_locales_at_search() { .await; index - .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), - |response, code| { - snapshot!(response, @r###" + .search( + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), + |response, code| { + snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -1032,43 +645,20 @@ async fn auto_infer_locales_at_search() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -1116,7 +706,7 @@ async fn force_different_locales_with_pattern_nested() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1136,60 +726,37 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "id": 852 + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + + // force japanese + index + .search( + json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "\"进击的巨人\"", @@ -1236,7 +803,7 @@ async fn settings_change() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1256,7 +823,7 @@ async fn settings_change() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1299,7 +866,7 @@ async fn settings_change() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1319,7 +886,7 @@ async fn settings_change() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1355,7 +922,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -1368,7 +935,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -1390,7 +957,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" @@ -1576,3 +1143,195 @@ async fn facet_search_with_localized_attributes() { } "###); } + +#[actix_rt::test] +async fn swedish_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": "tra1-1", "product": "trä"}, + {"id": "tra2-1", "product": "traktor"}, + {"id": "tra1-2", "product": "träbjälke"}, + {"id": "tra2-2", "product": "trafiksignal"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["swe"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + index + .search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + // force swedish + index + .search( + json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + index + .search( + json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} + +#[actix_rt::test] +async fn german_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": 1, "product": "Interkulturalität"}, + {"id": 2, "product": "Wissensorganisation"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["deu"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search( + json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Interkulturalität" + } + ], + "query": "kulturalität", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + + index + .search( + json!({"q": "organisation", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Wissensorganisation" + } + ], + "query": "organisation", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 974025652..d1091d944 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() { index.add_documents(json!(documents), None).await; index.wait_task(0).await; - index - .search(json!({ + let (response, code) = index + .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true - }), |response, code|{ - meili_snap::snapshot!(code, @"400 Bad Request"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" - { - "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", - "code": "feature_not_enabled", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#feature_not_enabled" - } - "###); - }) + })) .await; + + { + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + } + index .search(json!({ "retrieveVectors": true, @@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() { let (response, code) = index .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true, "retrieveVectors": true, })) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 08ad0b18c..eaa1da15f 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -412,7 +412,7 @@ async fn simple_search_illegal_index_uid() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, @r###" { - "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -437,7 +437,7 @@ async fn federation_search_illegal_index_uid() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, @r###" { - "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -3799,7 +3799,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `limit` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `limit` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3815,7 +3815,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `offset` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `offset` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3831,7 +3831,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `page` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `page` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3847,7 +3847,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `hitsPerPage` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `hitsPerPage` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3855,6 +3855,214 @@ async fn federation_federated_contains_pagination() { "###); } +#[actix_rt::test] +async fn federation_federated_contains_facets() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // empty facets are actually OK + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": []}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "name": "Red apple gala", + "id": "red-apple-gala", + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.953042328042328 + } + }, + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.9093915343915344 + } + }, + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.4393939393939394 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": ["BOOSTED"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #1 or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.fruits: [\"BOOSTED\"]` for facets in federated search", + "code": "invalid_multi_search_query_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" + } + "###); +} + +#[actix_rt::test] +async fn federation_non_faceted_for_an_index() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-name"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-facets"); + + let (value, _) = index.update_settings(json!({"searchableAttributes": ["name"]})).await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits-no-name", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", + "code": "invalid_multi_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" + } + "###); + + // still fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", + "code": "invalid_multi_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id"], + "fruits-no-facets": ["BOOST", "id"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n - Note: index `fruits-no-facets` is not used in queries", + "code": "invalid_multi_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" + } + "###); + + // also fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "zorglub": ["BOOST", "id", "name"], + "fruits": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n - Note: index `zorglub` is not used in queries", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + "###); +} + #[actix_rt::test] async fn federation_non_federated_contains_federation_option() { let server = Server::new().await; @@ -3875,7 +4083,7 @@ async fn federation_non_federated_contains_federation_option() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #1 or add `federation: {}` to the request.", + "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #1 or add `federation` to the request.", "code": "invalid_multi_search_federation_options", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_federation_options" @@ -4433,3 +4641,1328 @@ async fn federation_vector_two_indexes() { } "###); } + +#[actix_rt::test] +async fn federation_facets_different_indexes_same_facet() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman-2"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "batman-2": ["title"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title"], + "batman": ["title"], + "batman-2": ["title"] + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "title": { + "Badman": 2, + "Batman": 2, + "Batman Returns": 2, + "Batman the dark knight returns: Part 1": 2, + "Batman the dark knight returns: Part 2": 2, + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "facetStats": {} + } + "###); + + // mix and match query: will be sorted across indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": [], + "batman": ["title"], + "batman-2": ["title"] + } + }, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 2, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.5 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.5 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 11, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": {}, + "stats": {} + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_facets_same_indexes() { + let server = Server::new().await; + + let index = server.index("doggos"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("doggos-2"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 2, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1 + }, + "mother": { + "michelle": 1, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + }, + "doggos-2": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1 + }, + "father": { + "jean": 1, + "romain": 1 + }, + "mother": { + "michelle": 2 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 4.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + }, + "mergeFacets": {}, + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetDistribution": { + "doggos.age": { + "2": 2, + "4": 2, + "5": 1, + "6": 1 + }, + "father": { + "jean": 2, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 3, + "sophie": 1 + } + }, + "facetStats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_inconsistent_merge_order() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("movies-2"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "faceting": { + "sortFacetValuesBy": { "color": "count" } + } + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // without merging, it works + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + }, + "movies-2": { + "distribution": { + "color": { + "red": 3, + "blue": 3, + "yellow": 2, + "green": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + // fails with merging + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n - Note: index `movies-2` used in `.queries[2]`", + "code": "invalid_multi_search_facet_order", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" + } + "###); + + // can limit the number of values + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title"], + }, + "mergeFacets": { + "maxValuesPerFacet": 3, + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3 + }, + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1 + } + }, + "facetStats": {} + } + "###); +} diff --git a/meilisearch/tests/search/search_queue.rs b/meilisearch/tests/search/search_queue.rs index 3b4fbf252..498b741e5 100644 --- a/meilisearch/tests/search/search_queue.rs +++ b/meilisearch/tests/search/search_queue.rs @@ -37,6 +37,43 @@ async fn search_queue_register() { .unwrap(); } +#[actix_rt::test] +async fn search_queue_register_with_explicit_drop() { + let queue = SearchQueue::new(4, NonZeroUsize::new(2).unwrap()); + + // First, use all the cores + let permit1 = queue.try_get_search_permit().await.unwrap(); + let _permit2 = queue.try_get_search_permit().await.unwrap(); + + // If we free one spot we should be able to register one new search + permit1.drop().await; + + let permit3 = queue.try_get_search_permit().await.unwrap(); + + // And again + permit3.drop().await; + + let _permit4 = queue.try_get_search_permit().await.unwrap(); +} + +#[actix_rt::test] +async fn search_queue_register_with_time_to_abort() { + let queue = Arc::new( + SearchQueue::new(1, NonZeroUsize::new(1).unwrap()) + .with_time_to_abort(Duration::from_secs(1)), + ); + + // First, use all the cores + let permit1 = queue.try_get_search_permit().await.unwrap(); + let q = queue.clone(); + let permit2 = tokio::task::spawn(async move { q.try_get_search_permit().await }); + tokio::time::sleep(Duration::from_secs(1)).await; + permit1.drop().await; + let ret = permit2.await.unwrap(); + + snapshot!(ret.unwrap_err(), @"Too many search requests running at the same time: 1. Retry after 10s."); +} + #[actix_rt::test] async fn wait_till_cores_are_available() { let queue = Arc::new(SearchQueue::new(4, NonZeroUsize::new(1).unwrap())); diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 1571b8ca6..6de0db0b3 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -190,7 +190,8 @@ async fn secrets_are_hidden_in_settings() { "source": "rest", "apiKey": "My suXXXXXX...", "dimensions": 4, - "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", + "documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}", + "documentTemplateMaxBytes": 400, "url": "https://localhost:7777", "request": "{{text}}", "response": "{{embedding}}", @@ -329,7 +330,7 @@ async fn error_update_setting_unexisting_index_invalid_uid() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "`test##! ` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index d0be6562f..1e933e1c0 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -18,7 +18,7 @@ async fn similar_unexisting_index() { }); index - .similar(json!({"id": 287947}), |response, code| { + .similar(json!({"id": 287947, "embedder": "manual"}), |response, code| { assert_eq!(code, 404); assert_eq!(response, expected_response); }) @@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"id": 287947})).await; + let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -79,7 +79,7 @@ async fn similar_bad_id() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", + "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" @@ -172,7 +172,7 @@ async fn similar_invalid_id() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", + "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_similar_id", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_id" @@ -199,7 +199,8 @@ async fn similar_not_found_id() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await; + let (response, code) = + index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -230,7 +231,8 @@ async fn similar_bad_offset() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -241,7 +243,7 @@ async fn similar_bad_offset() { } "###); - let (response, code) = index.similar_get("?id=287947&offset=doggo").await; + let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -272,7 +274,8 @@ async fn similar_bad_limit() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -283,7 +286,7 @@ async fn similar_bad_limit() { } "###); - let (response, code) = index.similar_get("?id=287946&limit=doggo").await; + let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -323,7 +326,8 @@ async fn similar_bad_filter() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await; - let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await; + let (response, code) = + index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -361,10 +365,10 @@ async fn filter_invalid_syntax_object() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { + .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -400,10 +404,10 @@ async fn filter_invalid_syntax_array() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { + .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() { }); index .similar( - json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}), + json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}), |response, code| { assert_eq!(response, expected_response); assert_eq!(code, 400); @@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() { server.set_features(json!({"vectorStore": true})).await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() { } "###); - let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/similar/mod.rs b/meilisearch/tests/similar/mod.rs index b4c95b059..fa0797a41 100644 --- a/meilisearch/tests/similar/mod.rs +++ b/meilisearch/tests/similar/mod.rs @@ -80,9 +80,11 @@ async fn basic() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -154,13 +156,16 @@ async fn basic() { } ] "###); - }) + }, + ) .await; index - .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "How to Train Your Dragon: The Hidden World", @@ -232,7 +237,8 @@ async fn basic() { } ] "###); - }) + }, + ) .await; } @@ -272,7 +278,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4"); @@ -358,7 +364,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3"); @@ -426,7 +432,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2"); @@ -476,7 +482,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1"); @@ -508,7 +514,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @"[]"); @@ -553,7 +559,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -617,7 +623,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -681,9 +687,11 @@ async fn limit_and_offset() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -704,12 +712,13 @@ async fn limit_and_offset() { } ] "###); - }) + }, + ) .await; index .similar( - json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), + json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" diff --git a/meilisearch/tests/tasks/errors.rs b/meilisearch/tests/tasks/errors.rs index c404a2329..42ec42997 100644 --- a/meilisearch/tests/tasks/errors.rs +++ b/meilisearch/tests/tasks/errors.rs @@ -173,7 +173,7 @@ async fn task_bad_index_uids() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -184,7 +184,7 @@ async fn task_bad_index_uids() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" @@ -195,7 +195,7 @@ async fn task_bad_index_uids() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).", + "message": "Invalid value in parameter `indexUids`: `the good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes.", "code": "invalid_index_uid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_index_uid" diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs new file mode 100644 index 000000000..d3fe3c824 --- /dev/null +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -0,0 +1,380 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{GetAllDocumentsOptions, Server}; +use crate::json; +use crate::vector::generate_default_user_provided_documents; + +#[actix_rt::test] +async fn retrieve_binary_quantize_status_in_the_settings() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###); +} + +#[actix_rt::test] +async fn binary_quantize_before_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await.succeeded(); + + // Make sure the documents are binary quantized + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_after_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await.succeeded(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + // Make sure the documents are binary quantized + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn try_to_disable_binary_quantization() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let ret = server.wait_task(response.uid()).await; + snapshot!(ret, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false + } + } + }, + "error": { + "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_clear_documents() { + let server = Server::new().await; + let index = generate_default_user_provided_documents(&server).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (value, _code) = index.clear_all_documents().await; + index.wait_task(value.uid()).await.succeeded(); + + // Make sure the documents DB has been cleared + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [], + "offset": 0, + "limit": 20, + "total": 0 + } + "###); + + // Make sure the arroy DB has been cleared + let (documents, _code) = + index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await; + snapshot!(documents, @r###" + { + "hits": [], + "query": "", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + "semanticHitCount": 0 + } + "###); +} diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 7c9b375d9..47d0c1051 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod binary_quantized; mod openai; mod rest; mod settings; @@ -624,7 +625,8 @@ async fn clear_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await; snapshot!(documents, @r###" { "hits": [], @@ -685,7 +687,11 @@ async fn add_remove_one_vector_4588() { let task = index.wait_task(value.uid()).await; snapshot!(task, name: "document-deleted"); - let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; + let (documents, _code) = index + .search_post( + json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }), + ) + .await; snapshot!(documents, @r###" { "hits": [ diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs index f350abbe1..04c068c40 100644 --- a/meilisearch/tests/vector/openai.rs +++ b/meilisearch/tests/vector/openai.rs @@ -302,7 +302,8 @@ async fn create_mock_with_template( "source": "openAi", "url": url, "apiKey": API_KEY, - "documentTemplate": document_template + "documentTemplate": document_template, + "documentTemplateMaxBytes": 8000000, }); model_dimensions.add_to_settings(&mut embedder_settings); @@ -448,7 +449,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, })) .await; snapshot!(code, @"200 OK"); @@ -488,7 +489,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -528,7 +529,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -615,7 +616,7 @@ async fn tokenize_long_text() { "q": "grand chien de berger des montagnes", "showRankingScore": true, "attributesToRetrieve": ["id"], - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -693,6 +694,7 @@ async fn bad_api_key() { "model": "text-embedding-3-large", "apiKey": "XXX...", "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "documentTemplateMaxBytes": 8000000, "url": "[url]" } } @@ -735,6 +737,7 @@ async fn bad_api_key() { "source": "openAi", "model": "text-embedding-3-large", "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "documentTemplateMaxBytes": 8000000, "url": "[url]" } } @@ -1061,7 +1064,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1101,7 +1104,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1141,7 +1144,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1292,7 +1295,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1332,7 +1335,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1372,7 +1375,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1522,7 +1525,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1562,7 +1565,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1602,7 +1605,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1753,7 +1756,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1793,7 +1796,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1833,7 +1836,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs index 1a64eeb78..2748d0846 100644 --- a/meilisearch/tests/vector/rest.rs +++ b/meilisearch/tests/vector/rest.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::sync::atomic::{AtomicUsize, Ordering}; use meili_snap::{json_string, snapshot}; @@ -37,6 +38,46 @@ async fn create_mock() -> (MockServer, Value) { (mock_server, embedder_settings) } +async fn create_mock_map() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let text_to_embedding: BTreeMap<_, _> = vec![ + // text -> embedding + ("name: kefir\n", [0.0, 0.1, 0.2]), + ] + // turn into btree + .into_iter() + .collect(); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + let text: String = req.body_json().unwrap(); + match text_to_embedding.get(text.as_str()) { + Some(embedding) => { + ResponseTemplate::new(200).set_body_json(json!({ "data": embedding })) + } + None => ResponseTemplate::new(404) + .set_body_json(json!({"error": "text not found", "text": text})), + } + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": "{{text}}", + "response": { + "data": "{{embedding}}" + } + }); + + (mock_server, embedder_settings) +} + #[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] struct MultipleRequest { input: Vec, @@ -1100,6 +1141,7 @@ async fn server_returns_bad_request() { let (response, code) = index .update_settings(json!({ + "searchableAttributes": ["name", "missing_field"], "embedders": { "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }), }, @@ -1115,6 +1157,10 @@ async fn server_returns_bad_request() { "type": "settingsUpdate", "canceledBy": null, "details": { + "searchableAttributes": [ + "name", + "missing_field" + ], "embedders": { "rest": { "source": "rest", @@ -1148,7 +1194,7 @@ async fn server_returns_bad_request() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\" id: 1\\\\n name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 24\"}`", + "message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -1891,3 +1937,109 @@ async fn server_custom_header() { } "###); } + +#[actix_rt::test] +async fn searchable_reindex() { + let (_mock, setting) = create_mock_map().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "searchableAttributes": ["name", "missing_field"], + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "searchableAttributes": [ + "name", + "missing_field" + ], + "embedders": { + "rest": { + "source": "rest", + "dimensions": 3, + "url": "[url]", + "request": "{{text}}", + "response": { + "data": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = + index.add_documents(json!( { "id": 1, "name": "kefir", "breed": "patou" }), None).await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // triggers reindexing with the new searchable attribute. + // as the mock intentionally doesn't know of this text, the task will fail, outputting the putative rendered text. + let (response, code) = index + .update_settings(json!({ + "searchableAttributes": ["breed"], + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "searchableAttributes": [ + "breed" + ] + }, + "error": { + "message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 0714a22ca..4f07ca18b 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -218,7 +218,8 @@ async fn reset_embedder_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await; snapshot!(json_string!(documents), @r###" { "message": "Cannot find embedder with name `default`.", diff --git a/meilitool/Cargo.toml b/meilitool/Cargo.toml index bf68b219e..ce6c1ad5b 100644 --- a/meilitool/Cargo.toml +++ b/meilitool/Cargo.toml @@ -15,5 +15,6 @@ dump = { path = "../dump" } file-store = { path = "../file-store" } meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-types = { path = "../meilisearch-types" } +serde = { version = "1.0.209", features = ["derive"] } time = { version = "0.3.36", features = ["formatting"] } uuid = { version = "1.10.0", features = ["v4"], default-features = false } diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs index 06c4890a5..9dbff2486 100644 --- a/meilitool/src/main.rs +++ b/meilitool/src/main.rs @@ -2,7 +2,7 @@ use std::fs::{read_dir, read_to_string, remove_file, File}; use std::io::BufWriter; use std::path::PathBuf; -use anyhow::Context; +use anyhow::{bail, Context}; use clap::{Parser, Subcommand}; use dump::{DumpWriter, IndexMetadata}; use file_store::FileStore; @@ -10,9 +10,10 @@ use meilisearch_auth::AuthController; use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader}; +use meilisearch_types::milli::index::{db_name, main_key}; use meilisearch_types::milli::{obkv_to_json, BEU32}; use meilisearch_types::tasks::{Status, Task}; -use meilisearch_types::versioning::check_version_file; +use meilisearch_types::versioning::{create_version_file, get_version, parse_version}; use meilisearch_types::Index; use time::macros::format_description; use time::OffsetDateTime; @@ -62,21 +63,458 @@ enum Command { #[arg(long)] skip_enqueued_tasks: bool, }, + + /// Attempts to upgrade from one major version to the next without a dump. + /// + /// Make sure to run this commmand when Meilisearch is not running! + /// If Meilisearch is running while executing this command, the database could be corrupted + /// (contain data from both the old and the new versions) + /// + /// Supported upgrade paths: + /// + /// - v1.9.0 -> v1.10.0 + OfflineUpgrade { + #[arg(long)] + target_version: String, + }, } fn main() -> anyhow::Result<()> { let Cli { db_path, command } = Cli::parse(); - check_version_file(&db_path).context("While checking the version file")?; + let detected_version = get_version(&db_path).context("While checking the version file")?; match command { Command::ClearTaskQueue => clear_task_queue(db_path), Command::ExportADump { dump_dir, skip_enqueued_tasks } => { export_a_dump(db_path, dump_dir, skip_enqueued_tasks) } + Command::OfflineUpgrade { target_version } => { + let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?; + OfflineUpgrade { db_path, current_version: detected_version, target_version }.upgrade() + } } } +struct OfflineUpgrade { + db_path: PathBuf, + current_version: (String, String, String), + target_version: (String, String, String), +} + +impl OfflineUpgrade { + fn upgrade(self) -> anyhow::Result<()> { + // TODO: if we make this process support more versions, introduce a more flexible way of checking for the version + // currently only supports v1.9 to v1.10 + let (current_major, current_minor, current_patch) = &self.current_version; + + match (current_major.as_str(), current_minor.as_str(), current_patch.as_str()) { + ("1", "9", _) => {} + _ => { + bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9") + } + } + + let (target_major, target_minor, target_patch) = &self.target_version; + + match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { + ("1", "10", _) => {} + _ => { + bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10") + } + } + + println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); + + self.v1_9_to_v1_10()?; + + println!("Writing VERSION file"); + + create_version_file(&self.db_path, target_major, target_minor, target_patch) + .context("while writing VERSION file after the upgrade")?; + + println!("Success"); + + Ok(()) + } + + fn v1_9_to_v1_10(&self) -> anyhow::Result<()> { + // 2 changes here + + // 1. date format. needs to be done before opening the Index + // 2. REST embedders. We don't support this case right now, so bail + + let index_scheduler_path = self.db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| { + format!("While trying to open {:?}", index_scheduler_path.display()) + })?; + + let mut sched_wtxn = env.write_txn()?; + + let index_mapping: Database = + try_opening_database(&env, &sched_wtxn, "index-mapping")?; + + let index_stats: Database = + try_opening_database(&env, &sched_wtxn, "index-stats").with_context(|| { + format!("While trying to open {:?}", index_scheduler_path.display()) + })?; + + let index_count = + index_mapping.len(&sched_wtxn).context("while reading the number of indexes")?; + + // FIXME: not ideal, we have to pre-populate all indexes to prevent double borrow of sched_wtxn + // 1. immutably for the iteration + // 2. mutably for updating index stats + let indexes: Vec<_> = index_mapping + .iter(&sched_wtxn)? + .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) + .collect(); + + let mut rest_embedders = Vec::new(); + + let mut unwrapped_indexes = Vec::new(); + + // check that update can take place + for (index_index, result) in indexes.into_iter().enumerate() { + let (uid, uuid) = result?; + let index_path = self.db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + // FIXME: fetch the 25 magic number from the index file + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let index_txn = index_env.read_txn().with_context(|| { + format!( + "while obtaining a write transaction for index {uid} at {}", + index_path.display() + ) + })?; + + println!("\t- Checking for incompatible embedders (REST embedders)"); + let rest_embedders_for_index = find_rest_embedders(&uid, &index_env, &index_txn)?; + + if rest_embedders_for_index.is_empty() { + unwrapped_indexes.push((uid, uuid)); + } else { + // no need to add to unwrapped indexes because we'll exit early + rest_embedders.push((uid, rest_embedders_for_index)); + } + } + + if !rest_embedders.is_empty() { + let rest_embedders = rest_embedders + .into_iter() + .flat_map(|(index, embedders)| std::iter::repeat(index.clone()).zip(embedders)) + .map(|(index, embedder)| format!("\t- embedder `{embedder}` in index `{index}`")) + .collect::>() + .join("\n"); + bail!("The update cannot take place because there are REST embedder(s). Remove them before proceeding with the update:\n{rest_embedders}\n\n\ + The database has not been modified and is still a valid v1.9 database."); + } + + println!("Update can take place, updating"); + + for (index_index, (uid, uuid)) in unwrapped_indexes.into_iter().enumerate() { + let index_path = self.db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Updating index `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + // FIXME: fetch the 25 magic number from the index file + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let mut index_wtxn = index_env.write_txn().with_context(|| { + format!( + "while obtaining a write transaction for index `{uid}` at `{}`", + index_path.display() + ) + })?; + + println!("\t- Updating index stats"); + update_index_stats(index_stats, &uid, uuid, &mut sched_wtxn)?; + println!("\t- Updating date format"); + update_date_format(&uid, &index_env, &mut index_wtxn)?; + + index_wtxn.commit().with_context(|| { + format!( + "while committing the write txn for index `{uid}` at {}", + index_path.display() + ) + })?; + } + + sched_wtxn.commit().context("while committing the write txn for the index-scheduler")?; + + println!("Upgrading database succeeded"); + + Ok(()) + } +} + +pub mod v1_9 { + pub type FieldDistribution = std::collections::BTreeMap; + + /// The statistics that can be computed from an `Index` object. + #[derive(serde::Serialize, serde::Deserialize, Debug)] + pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + pub created_at: time::OffsetDateTime, + /// Date of the last update of the index. + pub updated_at: time::OffsetDateTime, + } + + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Deserialize, Serialize)] + pub struct IndexEmbeddingConfig { + pub name: String, + pub config: EmbeddingConfig, + } + + #[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)] + pub struct EmbeddingConfig { + /// Options of the embedder, specific to each kind of embedder + pub embedder_options: EmbedderOptions, + } + + /// Options of an embedder, specific to each kind of embedder. + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub enum EmbedderOptions { + HuggingFace(hf::EmbedderOptions), + OpenAi(openai::EmbedderOptions), + Ollama(ollama::EmbedderOptions), + UserProvided(manual::EmbedderOptions), + Rest(rest::EmbedderOptions), + } + + impl Default for EmbedderOptions { + fn default() -> Self { + Self::OpenAi(openai::EmbedderOptions { api_key: None, dimensions: None }) + } + } + + mod hf { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub model: String, + pub revision: Option, + } + } + mod openai { + + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub api_key: Option, + pub dimensions: Option, + } + } + mod ollama { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub embedding_model: String, + pub url: Option, + pub api_key: Option, + } + } + mod manual { + #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] + pub struct EmbedderOptions { + pub dimensions: usize, + } + } + mod rest { + #[derive(Debug, Clone, PartialEq, Eq, serde::Deserialize, serde::Serialize, Hash)] + pub struct EmbedderOptions { + pub api_key: Option, + pub dimensions: Option, + pub url: String, + pub input_field: Vec, + // path to the array of embeddings + pub path_to_embeddings: Vec, + // shape of a single embedding + pub embedding_object: Vec, + } + } + + pub type OffsetDateTime = time::OffsetDateTime; +} + +pub mod v1_10 { + use crate::v1_9; + + pub type FieldDistribution = std::collections::BTreeMap; + + /// The statistics that can be computed from an `Index` object. + #[derive(serde::Serialize, serde::Deserialize, Debug)] + pub struct IndexStats { + /// Number of documents in the index. + pub number_of_documents: u64, + /// Size taken up by the index' DB, in bytes. + /// + /// This includes the size taken by both the used and free pages of the DB, and as the free pages + /// are not returned to the disk after a deletion, this number is typically larger than + /// `used_database_size` that only includes the size of the used pages. + pub database_size: u64, + /// Size taken by the used pages of the index' DB, in bytes. + /// + /// As the DB backend does not return to the disk the pages that are not currently used by the DB, + /// this value is typically smaller than `database_size`. + pub used_database_size: u64, + /// Association of every field name with the number of times it occurs in the documents. + pub field_distribution: FieldDistribution, + /// Creation date of the index. + #[serde(with = "time::serde::rfc3339")] + pub created_at: time::OffsetDateTime, + /// Date of the last update of the index. + #[serde(with = "time::serde::rfc3339")] + pub updated_at: time::OffsetDateTime, + } + + impl From for IndexStats { + fn from( + v1_9::IndexStats { + number_of_documents, + database_size, + used_database_size, + field_distribution, + created_at, + updated_at, + }: v1_9::IndexStats, + ) -> Self { + IndexStats { + number_of_documents, + database_size, + used_database_size, + field_distribution, + created_at, + updated_at, + } + } + } + + #[derive(serde::Serialize, serde::Deserialize)] + #[serde(transparent)] + pub struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] pub time::OffsetDateTime); +} + +fn update_index_stats( + index_stats: Database, + index_uid: &str, + index_uuid: uuid::Uuid, + sched_wtxn: &mut RwTxn, +) -> anyhow::Result<()> { + let ctx = || format!("while updating index stats for index `{index_uid}`"); + + let stats: Option = index_stats + .remap_data_type::>() + .get(sched_wtxn, &index_uuid) + .with_context(ctx)?; + + if let Some(stats) = stats { + let stats: v1_10::IndexStats = stats.into(); + + index_stats + .remap_data_type::>() + .put(sched_wtxn, &index_uuid, &stats) + .with_context(ctx)?; + } + + Ok(()) +} + +fn update_date_format( + index_uid: &str, + index_env: &Env, + index_wtxn: &mut RwTxn, +) -> anyhow::Result<()> { + let main = try_opening_poly_database(index_env, index_wtxn, db_name::MAIN) + .with_context(|| format!("while updating date format for index `{index_uid}`"))?; + + date_round_trip(index_wtxn, index_uid, main, main_key::CREATED_AT_KEY)?; + date_round_trip(index_wtxn, index_uid, main, main_key::UPDATED_AT_KEY)?; + + Ok(()) +} + +fn find_rest_embedders( + index_uid: &str, + index_env: &Env, + index_txn: &RoTxn, +) -> anyhow::Result> { + let main = try_opening_poly_database(index_env, index_txn, db_name::MAIN) + .with_context(|| format!("while checking REST embedders for index `{index_uid}`"))?; + + let mut rest_embedders = vec![]; + + for config in main + .remap_types::>>() + .get(index_txn, main_key::EMBEDDING_CONFIGS)? + .unwrap_or_default() + { + if let v1_9::EmbedderOptions::Rest(_) = config.config.embedder_options { + rest_embedders.push(config.name); + } + } + + Ok(rest_embedders) +} + +fn date_round_trip( + wtxn: &mut RwTxn, + index_uid: &str, + db: Database, + key: &str, +) -> anyhow::Result<()> { + let datetime = + db.remap_types::>().get(wtxn, key).with_context( + || format!("could not read `{key}` while updating date format for index `{index_uid}`"), + )?; + + if let Some(datetime) = datetime { + db.remap_types::>() + .put(wtxn, key, &v1_10::OffsetDateTime(datetime)) + .with_context(|| { + format!( + "could not write `{key}` while updating date format for index `{index_uid}`" + ) + })?; + } + + Ok(()) +} + /// Clears the task queue located at `db_path`. fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> { let path = db_path.join("tasks"); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 79b61b4f1..5fc2d65c8 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.9.1" bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } byteorder = "1.5.0" -charabia = { version = "0.9.0", default-features = false } +charabia = { version = "0.9.1", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.13" deserr = "0.6.2" @@ -80,7 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", tiktoken-rs = "0.5.9" liquid = "0.26.6" rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } @@ -106,6 +106,8 @@ all-tokenizations = [ "charabia/greek", "charabia/khmer", "charabia/vietnamese", + "charabia/swedish-recomposition", + "charabia/german-segmentation", ] # Use POSIX semaphores instead of SysV semaphores in LMDB @@ -138,6 +140,9 @@ khmer = ["charabia/khmer"] # allow vietnamese specialized tokenization vietnamese = ["charabia/vietnamese"] +# allow german specialized tokenization +german = ["charabia/german-segmentation"] + # force swedish character recomposition swedish-recomposition = ["charabia/swedish-recomposition"] diff --git a/milli/src/documents/primary_key.rs b/milli/src/documents/primary_key.rs index 64131af40..9ac5ace91 100644 --- a/milli/src/documents/primary_key.rs +++ b/milli/src/documents/primary_key.rs @@ -150,12 +150,13 @@ fn starts_with(selector: &str, key: &str) -> bool { // FIXME: move to a DocumentId struct fn validate_document_id(document_id: &str) -> Option<&str> { - if !document_id.is_empty() - && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_')) + if document_id.is_empty() + || document_id.len() > 512 + || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') { - Some(document_id) - } else { None + } else { + Some(document_id) } } @@ -166,6 +167,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult Ok(s.to_string()), None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }), }, + // a `u64` or `i64` cannot be more than 512 bytes once converted to a string Value::Number(number) if !number.is_f64() => Ok(number.to_string()), content => Err(UserError::InvalidDocumentId { document_id: content }), } diff --git a/milli/src/error.rs b/milli/src/error.rs index 47a497292..f4bd42689 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -106,7 +106,8 @@ pub enum UserError { #[error( "Document identifier `{}` is invalid. \ A document identifier can be of type integer or string, \ -only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_).", .document_id.to_string() +only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \ +and can not be more than 512 bytes.", .document_id.to_string() )] InvalidDocumentId { document_id: Value }, #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))] @@ -258,6 +259,12 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, + #[error( + "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors." + )] + InvalidDisableBinaryQuantization { embedder_name: String }, + #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] + InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String }, #[error("Document editions cannot modify a document's primary key")] diff --git a/milli/src/index.rs b/milli/src/index.rs index 512e911aa..c47896df7 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::vector::{Embedding, EmbeddingConfig}; +use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -162,7 +162,7 @@ pub struct Index { /// Maps an embedder name to its id in the arroy store. pub embedder_category_id: Database, /// Vector store based on arroy™. - pub vector_arroy: arroy::Database, + pub vector_arroy: arroy::Database, /// Maps the document id to the document as an obkv store. pub(crate) documents: Database, @@ -1614,15 +1614,17 @@ impl Index { &'a self, rtxn: &'a RoTxn<'a>, embedder_id: u8, - ) -> impl Iterator>> + 'a { + quantized: bool, + ) -> impl Iterator> + 'a { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - arroy::Reader::open(rtxn, k, self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e.into()), - }) - .transpose() + let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); + // Here we don't care about the dimensions, but we want to know if we can read + // in the database or if its metadata are missing because there is no document with that many vectors. + match reader.dimensions(rtxn) { + Ok(_) => Some(Ok(reader)), + Err(arroy::Error::MissingMetadata(_)) => None, + Err(e) => Some(Err(e.into())), + } }) } @@ -1644,32 +1646,18 @@ impl Index { docid: DocumentId, ) -> Result>> { let mut res = BTreeMap::new(); - for row in self.embedder_category_id.iter(rtxn)? { - let (embedder_name, embedder_id) = row?; - let embedder_id = (embedder_id as u16) << 8; - let mut embeddings = Vec::new(); - 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; - }; - - let embedding = reader?.item_vector(rtxn, docid)?; - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } - } - - res.insert(embedder_name.to_owned(), embeddings); + let embedding_configs = self.embedding_configs(rtxn)?; + for config in embedding_configs { + let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); + let embeddings = self + .arroy_readers(rtxn, embedder_id, config.config.quantized()) + .map_while(|reader| { + reader + .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into())) + .transpose() + }) + .collect::>>()?; + res.insert(config.name.to_owned(), embeddings); } Ok(res) } diff --git a/milli/src/prompt/context.rs b/milli/src/prompt/context.rs index a28a87caa..7ab08301a 100644 --- a/milli/src/prompt/context.rs +++ b/milli/src/prompt/context.rs @@ -5,7 +5,7 @@ use liquid::{ObjectView, ValueView}; use super::document::Document; use super::fields::Fields; -use crate::FieldsIdsMap; +use super::FieldsIdsMapWithMetadata; #[derive(Debug, Clone)] pub struct Context<'a> { @@ -14,7 +14,7 @@ pub struct Context<'a> { } impl<'a> Context<'a> { - pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self { + pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self { Self { document, fields: Fields::new(document, field_id_map) } } } diff --git a/milli/src/prompt/fields.rs b/milli/src/prompt/fields.rs index 3187485f1..81ea88ca6 100644 --- a/milli/src/prompt/fields.rs +++ b/milli/src/prompt/fields.rs @@ -4,16 +4,20 @@ use liquid::model::{ use liquid::{ObjectView, ValueView}; use super::document::Document; -use crate::FieldsIdsMap; +use super::{FieldMetadata, FieldsIdsMapWithMetadata}; #[derive(Debug, Clone)] pub struct Fields<'a>(Vec>); impl<'a> Fields<'a> { - pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self { + pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self { Self( std::iter::repeat(document) .zip(field_id_map.iter()) - .map(|(document, (_fid, name))| FieldValue { document, name }) + .map(|(document, (fid, name))| FieldValue { + document, + name, + metadata: field_id_map.metadata(fid).unwrap_or_default(), + }) .collect(), ) } @@ -23,6 +27,7 @@ impl<'a> Fields<'a> { pub struct FieldValue<'a> { name: &'a str, document: &'a Document<'a>, + metadata: FieldMetadata, } impl<'a> ValueView for FieldValue<'a> { @@ -74,6 +79,10 @@ impl<'a> FieldValue<'a> { self.document.get(self.name).unwrap_or(&LiquidValue::Nil) } + pub fn is_searchable(&self) -> &bool { + &self.metadata.searchable + } + pub fn is_empty(&self) -> bool { self.size() == 0 } @@ -89,12 +98,14 @@ impl<'a> ObjectView for FieldValue<'a> { } fn keys<'k>(&'k self) -> Box> + 'k> { - Box::new(["name", "value"].iter().map(|&x| KStringCow::from_static(x))) + Box::new(["name", "value", "is_searchable"].iter().map(|&x| KStringCow::from_static(x))) } fn values<'k>(&'k self) -> Box + 'k> { Box::new( - std::iter::once(self.name() as &dyn ValueView).chain(std::iter::once(self.value())), + std::iter::once(self.name() as &dyn ValueView) + .chain(std::iter::once(self.value())) + .chain(std::iter::once(self.is_searchable() as &dyn ValueView)), ) } @@ -103,13 +114,14 @@ impl<'a> ObjectView for FieldValue<'a> { } fn contains_key(&self, index: &str) -> bool { - index == "name" || index == "value" + index == "name" || index == "value" || index == "is_searchable" } fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> { match index { "name" => Some(self.name()), "value" => Some(self.value()), + "is_searchable" => Some(self.is_searchable()), _ => None, } } diff --git a/milli/src/prompt/mod.rs b/milli/src/prompt/mod.rs index 97ccbfb61..3b32b916f 100644 --- a/milli/src/prompt/mod.rs +++ b/milli/src/prompt/mod.rs @@ -4,28 +4,33 @@ pub(crate) mod error; mod fields; mod template_checker; +use std::collections::BTreeMap; use std::convert::TryFrom; +use std::num::NonZeroUsize; +use std::ops::Deref; use error::{NewPromptError, RenderPromptError}; use self::context::Context; use self::document::Document; use crate::update::del_add::DelAdd; -use crate::FieldsIdsMap; +use crate::{FieldId, FieldsIdsMap}; pub struct Prompt { template: liquid::Template, template_text: String, + max_bytes: Option, } #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct PromptData { pub template: String, + pub max_bytes: Option, } impl From for PromptData { fn from(value: Prompt) -> Self { - Self { template: value.template_text } + Self { template: value.template_text, max_bytes: value.max_bytes } } } @@ -33,14 +38,18 @@ impl TryFrom for Prompt { type Error = NewPromptError; fn try_from(value: PromptData) -> Result { - Prompt::new(value.template) + Prompt::new(value.template, value.max_bytes) } } impl Clone for Prompt { fn clone(&self) -> Self { let template_text = self.template_text.clone(); - Self { template: new_template(&template_text).unwrap(), template_text } + Self { + template: new_template(&template_text).unwrap(), + template_text, + max_bytes: self.max_bytes, + } } } @@ -53,25 +62,35 @@ fn default_template() -> liquid::Template { } fn default_template_text() -> &'static str { - "{% for field in fields %} \ + "{% for field in fields %}\ + {% if field.is_searchable and field.value != nil %}\ {{ field.name }}: {{ field.value }}\n\ + {% endif %}\ {% endfor %}" } +pub fn default_max_bytes() -> NonZeroUsize { + NonZeroUsize::new(400).unwrap() +} + impl Default for Prompt { fn default() -> Self { - Self { template: default_template(), template_text: default_template_text().into() } + Self { + template: default_template(), + template_text: default_template_text().into(), + max_bytes: Some(default_max_bytes()), + } } } impl Default for PromptData { fn default() -> Self { - Self { template: default_template_text().into() } + Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) } } } impl Prompt { - pub fn new(template: String) -> Result { + pub fn new(template: String, max_bytes: Option) -> Result { let this = Self { template: liquid::ParserBuilder::with_stdlib() .build() @@ -79,6 +98,7 @@ impl Prompt { .parse(&template) .map_err(NewPromptError::cannot_parse_template)?, template_text: template, + max_bytes, }; // render template with special object that's OK with `doc.*` and `fields.*` @@ -93,20 +113,72 @@ impl Prompt { &self, document: obkv::KvReaderU16<'_>, side: DelAdd, - field_id_map: &FieldsIdsMap, + field_id_map: &FieldsIdsMapWithMetadata, ) -> Result { let document = Document::new(document, side, field_id_map); let context = Context::new(&document, field_id_map); - self.template.render(&context).map_err(RenderPromptError::missing_context) + let mut rendered = + self.template.render(&context).map_err(RenderPromptError::missing_context)?; + if let Some(max_bytes) = self.max_bytes { + truncate(&mut rendered, max_bytes.get()); + } + Ok(rendered) } } +fn truncate(s: &mut String, max_bytes: usize) { + if max_bytes >= s.len() { + return; + } + for i in (0..=max_bytes).rev() { + if s.is_char_boundary(i) { + s.truncate(i); + break; + } + } +} + +pub struct FieldsIdsMapWithMetadata<'a> { + fields_ids_map: &'a FieldsIdsMap, + metadata: BTreeMap, +} + +impl<'a> FieldsIdsMapWithMetadata<'a> { + pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self { + let mut metadata: BTreeMap = + fields_ids_map.ids().map(|id| (id, Default::default())).collect(); + for searchable_field_id in searchable_fields_ids { + let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue }; + metadata.searchable = true; + } + Self { fields_ids_map, metadata } + } + + pub fn metadata(&self, field_id: FieldId) -> Option { + self.metadata.get(&field_id).copied() + } +} + +impl<'a> Deref for FieldsIdsMapWithMetadata<'a> { + type Target = FieldsIdsMap; + + fn deref(&self) -> &Self::Target { + self.fields_ids_map + } +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct FieldMetadata { + pub searchable: bool, +} + #[cfg(test)] mod test { use super::Prompt; use crate::error::FaultSource; use crate::prompt::error::{NewPromptError, NewPromptErrorKind}; + use crate::prompt::truncate; #[test] fn default_template() { @@ -116,18 +188,18 @@ mod test { #[test] fn empty_template() { - Prompt::new("".into()).unwrap(); + Prompt::new("".into(), None).unwrap(); } #[test] fn template_ok() { - Prompt::new("{{doc.title}}: {{doc.overview}}".into()).unwrap(); + Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap(); } #[test] fn template_syntax() { assert!(matches!( - Prompt::new("{{doc.title: {{doc.overview}}".into()), + Prompt::new("{{doc.title: {{doc.overview}}".into(), None), Err(NewPromptError { kind: NewPromptErrorKind::CannotParseTemplate(_), fault: FaultSource::User @@ -138,7 +210,7 @@ mod test { #[test] fn template_missing_doc() { assert!(matches!( - Prompt::new("{{title}}: {{overview}}".into()), + Prompt::new("{{title}}: {{overview}}".into(), None), Err(NewPromptError { kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), fault: FaultSource::User @@ -148,29 +220,62 @@ mod test { #[test] fn template_nested_doc() { - Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into()).unwrap(); + Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap(); } #[test] fn template_fields() { - Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into()).unwrap(); + Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap(); } #[test] fn template_fields_ok() { - Prompt::new("{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into()) - .unwrap(); + Prompt::new( + "{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(), + None, + ) + .unwrap(); } #[test] fn template_fields_invalid() { assert!(matches!( // intentionally garbled field - Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into()), + Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None), Err(NewPromptError { kind: NewPromptErrorKind::InvalidFieldsInTemplate(_), fault: FaultSource::User }) )); } + + // todo: test truncation + #[test] + fn template_truncation() { + let mut s = "インテル ザー ビーグル".to_string(); + + truncate(&mut s, 42); + assert_eq!(s, "インテル ザー ビーグル"); + + assert_eq!(s.len(), 32); + truncate(&mut s, 32); + assert_eq!(s, "インテル ザー ビーグル"); + + truncate(&mut s, 31); + assert_eq!(s, "インテル ザー ビーグ"); + truncate(&mut s, 30); + assert_eq!(s, "インテル ザー ビーグ"); + truncate(&mut s, 28); + assert_eq!(s, "インテル ザー ビー"); + truncate(&mut s, 26); + assert_eq!(s, "インテル ザー ビー"); + truncate(&mut s, 25); + assert_eq!(s, "インテル ザー ビ"); + + assert_eq!("イ".len(), 3); + truncate(&mut s, 3); + assert_eq!(s, "イ"); + truncate(&mut s, 2); + assert_eq!(s, ""); + } } diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 62ae05740..a63bb634b 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Display; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -37,6 +38,15 @@ pub enum OrderBy { Count, } +impl Display for OrderBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + OrderBy::Lexicographic => f.write_str("alphabetically"), + OrderBy::Count => f.write_str("by count"), + } + } +} + pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, @@ -100,7 +110,6 @@ impl<'a> FacetDistribution<'a> { let mut lexicographic_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); - let distribution_prelength = distribution.len(); let db = self.index.field_id_docid_facet_f64s; for docid in candidates { key_buffer.truncate(mem::size_of::()); @@ -113,23 +122,21 @@ impl<'a> FacetDistribution<'a> { for result in iter { let ((_, _, value), ()) = result?; *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; - - if lexicographic_distribution.len() - distribution_prelength - == self.max_values_per_facet - { - break; - } } } - distribution.extend(lexicographic_distribution); + distribution.extend( + lexicographic_distribution + .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())), + ); } FacetType::String => { let mut normalized_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let db = self.index.field_id_docid_facet_strings; - 'outer: for docid in candidates { + for docid in candidates { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db @@ -144,14 +151,14 @@ impl<'a> FacetDistribution<'a> { .or_insert_with(|| (original_value, 0)); *count += 1; - if normalized_distribution.len() == self.max_values_per_facet { - break 'outer; - } + // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid, + // so higher ranked facets could be in later docids } } let iter = normalized_distribution .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())) .map(|(_normalized, (original, count))| (original.to_string(), count)); distribution.extend(iter); } @@ -467,7 +474,7 @@ mod tests { .execute() .unwrap(); - milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###); let map = FacetDistribution::new(&txn, &index) .facets(iter::once(("colour", OrderBy::Count))) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 9ce201aca..c059d2d27 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -12,7 +12,7 @@ use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ @@ -336,6 +336,24 @@ impl<'a> Filter<'a> { return Ok(docids); } + Condition::StartsWith { keyword: _, word } => { + let value = crate::normalize_facet(word.value()); + let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() }; + let docids = strings_db + .prefix_iter(rtxn, &base)? + .map(|result| -> Result { + match result { + Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap), + Err(_e) => Err(InternalError::from(SerializationError::Decoding { + db_name: Some(FACET_ID_STRING_DOCIDS), + }) + .into()), + } + }) + .union()?; + + return Ok(docids); + } }; let mut output = RoaringBitmap::new(); diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index e08111473..8b274804c 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -190,7 +190,7 @@ impl<'a> Search<'a> { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; // no embedder, no semantic search - let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { + let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; @@ -212,7 +212,7 @@ impl<'a> Search<'a> { }; search.semantic = - Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder }); + Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized }); // TODO: would be better to have two distinct functions at this point let vector_results = search.execute()?; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 3057066d2..d5b05f515 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -32,6 +32,7 @@ pub struct SemanticSearch { vector: Option>, embedder_name: String, embedder: Arc, + quantized: bool, } pub struct Search<'a> { @@ -89,9 +90,10 @@ impl<'a> Search<'a> { &mut self, embedder_name: String, embedder: Arc, + quantized: bool, vector: Option>, ) -> &mut Search<'a> { - self.semantic = Some(SemanticSearch { embedder_name, embedder, vector }); + self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector }); self } @@ -206,7 +208,7 @@ impl<'a> Search<'a> { degraded, used_negative_operator, } = match self.semantic.as_ref() { - Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => { + Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => { execute_vector_search( &mut ctx, vector, @@ -219,6 +221,7 @@ impl<'a> Search<'a> { self.limit, embedder_name, embedder, + *quantized, self.time_budget.clone(), self.ranking_score_threshold, )? diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index b30306a0b..f7c590360 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( Ok(ranking_rules) } +#[allow(clippy::too_many_arguments)] fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, @@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>( target: &[f32], embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result>> { // query graph search @@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>( limit_plus_offset, embedder_name, embedder, + quantized, )?; ranking_rules.push(Box::new(vector_sort)); vector = true; @@ -576,6 +579,7 @@ pub fn execute_vector_search( length: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, time_budget: TimeBudget, ranking_score_threshold: Option, ) -> Result { @@ -591,6 +595,7 @@ pub fn execute_vector_search( vector, embedder_name, embedder, + quantized, )?; let mut placeholder_search_logger = logger::DefaultSearchLogger; diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index e56f3cbbe..de1dacbe7 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -16,6 +16,7 @@ pub struct VectorSort { limit: usize, distribution_shift: Option, embedder_index: u8, + quantized: bool, } impl VectorSort { @@ -26,6 +27,7 @@ impl VectorSort { limit: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result { let embedder_index = ctx .index @@ -41,6 +43,7 @@ impl VectorSort { limit, distribution_shift: embedder.distribution(), embedder_index, + quantized, }) } @@ -49,16 +52,12 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: std::result::Result, _> = - ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect(); - let readers = readers?; - let target = &self.target; let mut results = Vec::new(); - for reader in readers.iter() { + for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) { let nns_by_vector = - reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?; + reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index bf5cc323f..0cb8d723d 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -18,9 +18,11 @@ pub struct Similar<'a> { embedder_name: String, embedder: Arc, ranking_score_threshold: Option, + quantized: bool, } impl<'a> Similar<'a> { + #[allow(clippy::too_many_arguments)] pub fn new( id: DocumentId, offset: usize, @@ -29,6 +31,7 @@ impl<'a> Similar<'a> { rtxn: &'a heed::RoTxn<'a>, embedder_name: String, embedder: Arc, + quantized: bool, ) -> Self { Self { id, @@ -40,6 +43,7 @@ impl<'a> Similar<'a> { embedder_name, embedder, ranking_score_threshold: None, + quantized, } } @@ -67,19 +71,13 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: std::result::Result, _> = - self.index.arroy_readers(self.rtxn, embedder_index).collect(); - - let readers = readers?; - let mut results = Vec::new(); - for reader in readers.iter() { - let nns_by_item = reader.nns_by_item( + for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) { + let nns_by_item = reader?.nns_by_item( self.rtxn, self.id, self.limit + self.offset + 1, - None, Some(&universe), )?; if let Some(mut nns_by_item) = nns_by_item { diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index f66c3fd46..38a4ebe8a 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -15,14 +15,14 @@ use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::error::FaultSource; use crate::index::IndexEmbeddingConfig; -use crate::prompt::Prompt; +use crate::prompt::{FieldsIdsMapWithMetadata, Prompt}; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; -use crate::vector::settings::{EmbedderAction, ReindexAction}; +use crate::vector::settings::ReindexAction; use crate::vector::{Embedder, Embeddings}; -use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort}; +use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; /// The length of the elements that are always in the buffer when inserting new values. const TRUNCATE_SIZE: usize = size_of::(); @@ -189,7 +189,13 @@ pub fn extract_vector_points( let reindex_vectors = settings_diff.reindex_vectors(); let old_fields_ids_map = &settings_diff.old.fields_ids_map; + let old_fields_ids_map = + FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids); + let new_fields_ids_map = &settings_diff.new.fields_ids_map; + let new_fields_ids_map = + FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids); + // the vector field id may have changed let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); @@ -202,65 +208,65 @@ pub fn extract_vector_points( if reindex_vectors { for (name, action) in settings_diff.embedding_config_updates.iter() { - match action { - EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted - EmbedderAction::Reindex(action) => { - let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name) - else { - tracing::error!(embedder = name, "Requested embedder config not found"); - continue; - }; + if let Some(action) = action.reindex() { + let Some((embedder_name, (embedder, prompt, _quantized))) = + configs.remove_entry(name) + else { + tracing::error!(embedder = name, "Requested embedder config not found"); + continue; + }; - // (docid, _index) -> KvWriterDelAdd -> Vector - let manual_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid, _index) -> KvWriterDelAdd -> Vector + let manual_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> (prompt) - let prompts_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> (prompt) + let prompts_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> () - let remove_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> () + let remove_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - let action = match action { - ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, - ReindexAction::RegeneratePrompts => { - let Some((_, old_prompt)) = old_configs.get(name) else { - tracing::error!(embedder = name, "Old embedder config not found"); - continue; - }; + let action = match action { + ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, + ReindexAction::RegeneratePrompts => { + let Some((_, old_prompt, _quantized)) = old_configs.get(name) else { + tracing::error!(embedder = name, "Old embedder config not found"); + continue; + }; - ExtractionAction::SettingsRegeneratePrompts { old_prompt } - } - }; + ExtractionAction::SettingsRegeneratePrompts { old_prompt } + } + }; - extractors.push(EmbedderVectorExtractor { - embedder_name, - embedder, - prompt, - prompts_writer, - remove_vectors_writer, - manual_vectors_writer, - add_to_user_provided: RoaringBitmap::new(), - action, - }); - } + extractors.push(EmbedderVectorExtractor { + embedder_name, + embedder, + prompt, + prompts_writer, + remove_vectors_writer, + manual_vectors_writer, + add_to_user_provided: RoaringBitmap::new(), + action, + }); + } else { + continue; } } } else { // document operation - for (embedder_name, (embedder, prompt)) in configs.into_iter() { + for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() { // (docid, _index) -> KvWriterDelAdd -> Vector let manual_vectors_writer = create_writer( indexer.chunk_compression_type, @@ -376,7 +382,7 @@ pub fn extract_vector_points( ); continue; } - regenerate_prompt(obkv, prompt, new_fields_ids_map)? + regenerate_prompt(obkv, prompt, &new_fields_ids_map)? } }, // prompt regeneration is only triggered for existing embedders @@ -393,7 +399,7 @@ pub fn extract_vector_points( regenerate_if_prompt_changed( obkv, (old_prompt, prompt), - (old_fields_ids_map, new_fields_ids_map), + (&old_fields_ids_map, &new_fields_ids_map), )? } else { // we can simply ignore user provided vectors as they are not regenerated and are @@ -409,7 +415,7 @@ pub fn extract_vector_points( prompt, (add_to_user_provided, remove_from_user_provided), (old, new), - (old_fields_ids_map, new_fields_ids_map), + (&old_fields_ids_map, &new_fields_ids_map), document_id, embedder_name, embedder_is_manual, @@ -479,7 +485,10 @@ fn extract_vector_document_diff( prompt: &Prompt, (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), (old, new): (VectorState, VectorState), - (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), + (old_fields_ids_map, new_fields_ids_map): ( + &FieldsIdsMapWithMetadata, + &FieldsIdsMapWithMetadata, + ), document_id: impl Fn() -> Value, embedder_name: &str, embedder_is_manual: bool, @@ -599,7 +608,10 @@ fn extract_vector_document_diff( fn regenerate_if_prompt_changed( obkv: obkv::KvReader<'_, FieldId>, (old_prompt, new_prompt): (&Prompt, &Prompt), - (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), + (old_fields_ids_map, new_fields_ids_map): ( + &FieldsIdsMapWithMetadata, + &FieldsIdsMapWithMetadata, + ), ) -> Result { let old_prompt = old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default()); @@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed( fn regenerate_prompt( obkv: obkv::KvReader<'_, FieldId>, prompt: &Prompt, - new_fields_ids_map: &FieldsIdsMap, + new_fields_ids_map: &FieldsIdsMapWithMetadata, ) -> Result { let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 87c6bc6db..326dd842d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::EmbeddingConfigs; +use crate::vector::{ArroyWrapper, EmbeddingConfigs}; use crate::{CboRoaringBitmapCodec, Index, Object, Result}; static MERGED_DATABASE_COUNT: usize = 7; @@ -679,6 +679,24 @@ where let number_of_documents = self.index.number_of_documents(self.wtxn)?; let mut rng = rand::rngs::StdRng::seed_from_u64(42); + // If an embedder wasn't used in the typedchunk but must be binary quantized + // we should insert it in `dimension` + for (name, action) in settings_diff.embedding_config_updates.iter() { + if action.is_being_quantized && !dimension.contains_key(name.as_str()) { + let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or( + InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + }, + )?; + let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); + let reader = + ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized); + let dim = reader.dimensions(self.wtxn)?; + dimension.insert(name.to_string(), dim); + } + } + for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; let vector_arroy = self.index.vector_arroy; @@ -686,13 +704,23 @@ where let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); + let was_quantized = settings_diff + .old + .embedding_configs + .get(&embedder_name) + .map_or(false, |conf| conf.2); + let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let writer = arroy::Writer::new(vector_arroy, k, dimension); - if writer.need_build(wtxn)? { - writer.build(wtxn, &mut rng, None)?; - } else if writer.is_empty(wtxn)? { + let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized); + if is_quantizing { + writer.quantize(wtxn, k, dimension)?; + } + if writer.need_build(wtxn, dimension)? { + writer.build(wtxn, &mut rng, dimension)?; + } else if writer.is_empty(wtxn, dimension)? { break; } } @@ -2740,11 +2768,13 @@ mod tests { api_key: Setting::NotSet, dimensions: Setting::Set(3), document_template: Setting::NotSet, + document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, distribution: Setting::NotSet, headers: Setting::NotSet, + binary_quantized: Setting::NotSet, }), ); settings.set_embedder_settings(embedders); @@ -2773,7 +2803,7 @@ mod tests { std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); let res = index .search(&rtxn) - .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) + .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) .execute() .unwrap(); assert_eq!(res.documents_ids.len(), 3); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 73fa3ca7b..bb2cfe56c 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; +use crate::vector::settings::WriteBackToDocuments; +use crate::vector::ArroyWrapper; use crate::{ is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, }; @@ -989,19 +990,17 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result< - BTreeMap<&str, (Vec>, &RoaringBitmap)>, - > = settings_diff + let readers: Result, &RoaringBitmap)>> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { - if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }) = action + if let Some(WriteBackToDocuments { embedder_id, user_provided }) = + action.write_back() { - let readers: Result> = - self.index.arroy_readers(wtxn, *embedder_id).collect(); + let readers: Result> = self + .index + .arroy_readers(wtxn, *embedder_id, action.was_quantized) + .collect(); match readers { Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), Err(error) => Some(Err(error)), @@ -1104,23 +1103,14 @@ impl<'a, 'i> Transform<'a, 'i> { } } - let mut writers = Vec::new(); - // delete all vectors from the embedders that need removal for (_, (readers, _)) in readers { for reader in readers { - let dimensions = reader.dimensions(); - let arroy_index = reader.index(); - drop(reader); - let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions); - writers.push(writer); + let dimensions = reader.dimensions(wtxn)?; + reader.clear(wtxn, dimensions)?; } } - for writer in writers { - writer.clear(wtxn)?; - } - let grenad_params = GrenadParameters { chunk_compression_type: self.indexer_settings.chunk_compression_type, chunk_compression_level: self.indexer_settings.chunk_compression_level, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 9de95778b..97a4bf712 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::ArroyWrapper; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -666,9 +667,14 @@ pub(crate) fn write_typed_chunk_into_index( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let binary_quantized = settings_diff + .old + .embedding_configs + .get(&embedder_name) + .map_or(false, |conf| conf.2); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension)) + .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) .collect(); // remove vectors for docids we want them removed @@ -679,7 +685,7 @@ pub(crate) fn write_typed_chunk_into_index( for writer in &writers { // Uses invariant: vectors are packed in the first writers. - if !writer.del_item(wtxn, docid)? { + if !writer.del_item(wtxn, expected_dimension, docid)? { break; } } @@ -711,7 +717,7 @@ pub(crate) fn write_typed_chunk_into_index( ))); } for (embedding, writer) in embeddings.iter().zip(&writers) { - writer.add_item(wtxn, docid, embedding)?; + writer.add_item(wtxn, expected_dimension, docid, embedding)?; } } @@ -734,7 +740,7 @@ pub(crate) fn write_typed_chunk_into_index( break; }; if candidate == vector { - writer.del_item(wtxn, docid)?; + writer.del_item(wtxn, expected_dimension, docid)?; deleted_index = Some(index); } } @@ -751,8 +757,13 @@ pub(crate) fn write_typed_chunk_into_index( if let Some((last_index, vector)) = last_index_with_a_vector { // unwrap: computed the index from the list of writers let writer = writers.get(last_index).unwrap(); - writer.del_item(wtxn, docid)?; - writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?; + writer.del_item(wtxn, expected_dimension, docid)?; + writers.get(deleted_index).unwrap().add_item( + wtxn, + expected_dimension, + docid, + &vector, + )?; } } } @@ -762,8 +773,8 @@ pub(crate) fn write_typed_chunk_into_index( // overflow was detected during vector extraction. for writer in &writers { - if !writer.contains_item(wtxn, docid)? { - writer.add_item(wtxn, docid, &vector)?; + if !writer.contains_item(wtxn, expected_dimension, docid)? { + writer.add_item(wtxn, expected_dimension, docid, &vector)?; break; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 9799fc6ec..6e2b53d58 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1,5 +1,6 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use std::convert::TryInto; +use std::num::NonZeroUsize; use std::result::Result as StdResult; use std::sync::Arc; @@ -19,6 +20,7 @@ use crate::index::{ IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, }; use crate::order_by_map::OrderByMap; +use crate::prompt::default_max_bytes; use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; @@ -952,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let old_configs = self.index.embedding_configs(self.wtxn)?; let remove_all: Result> = old_configs .into_iter() - .map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> { + .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> { let embedder_id = self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( crate::InternalError::DatabaseMissingEntry { @@ -962,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { )?; Ok(( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + config.quantized(), + ), )) }) .collect(); @@ -1002,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { match joined { // updated config EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { - let settings_diff = SettingsDiff::from_settings(old, new); + let was_quantized = old.binary_quantized.set().unwrap_or_default(); + let settings_diff = SettingsDiff::from_settings(&name, old, new)?; match settings_diff { SettingsDiff::Remove => { tracing::debug!( @@ -1021,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.index.embedder_category_id.delete(self.wtxn, &name)?; embedder_actions.insert( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + was_quantized, + ), ); } - SettingsDiff::Reindex { action, updated_settings } => { + SettingsDiff::Reindex { action, updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), ?action, "reindex embedder" ); - embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(action, was_quantized) + .with_is_being_quantized(quantize), + ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; updated_configs.insert(name, (new, user_provided)); } - SettingsDiff::UpdateWithoutReindex { updated_settings } => { + SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), @@ -1047,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; + if quantize { + embedder_actions.insert( + name.clone(), + EmbedderAction::default().with_is_being_quantized(true), + ); + } updated_configs.insert(name, (new, user_provided)); } } @@ -1065,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { &mut setting, ); let setting = validate_embedding_settings(setting, &name)?; - embedder_actions - .insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(ReindexAction::FullReindex, false), + ); updated_configs.insert(name, (setting, RoaringBitmap::new())); } } @@ -1080,19 +1095,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let mut find_free_index = move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); for (name, action) in embedder_actions.iter() { - match action { - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { - /* cannot be a new embedder, so has to have an id already */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { - if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { - let id = find_free_index() - .ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; - tracing::debug!(embedder = name, id, "assigning free id to new embedder"); - self.index.embedder_category_id.put(self.wtxn, name, &id)?; - } - } - EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ } + // ignore actions that are not possible for a new embedder + if matches!(action.reindex(), Some(ReindexAction::FullReindex)) + && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() + { + let id = + find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; + tracing::debug!(embedder = name, id, "assigning free id to new embedder"); + self.index.embedder_category_id.put(self.wtxn, name, &id)?; } } let updated_configs: Vec = updated_configs @@ -1238,7 +1248,7 @@ impl InnerIndexSettingsDiff { old_settings: InnerIndexSettings, new_settings: InnerIndexSettings, primary_key_id: Option, - embedding_config_updates: BTreeMap, + mut embedding_config_updates: BTreeMap, settings_update_only: bool, ) -> Self { let only_additional_fields = match ( @@ -1273,6 +1283,39 @@ impl InnerIndexSettingsDiff { let cache_user_defined_searchables = old_settings.user_defined_searchable_fields != new_settings.user_defined_searchable_fields; + // if the user-defined searchables changed, then we need to reindex prompts. + if cache_user_defined_searchables { + for (embedder_name, (config, _, _quantized)) in + new_settings.embedding_configs.inner_as_ref() + { + let was_quantized = + old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2); + // skip embedders that don't use document templates + if !config.uses_document_template() { + continue; + } + + // note: this could currently be entry.or_insert(..), but we're future-proofing with an explicit match + // this always makes the code clearer by explicitly handling the cases + match embedding_config_updates.entry(embedder_name.clone()) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(EmbedderAction::with_reindex( + ReindexAction::RegeneratePrompts, + was_quantized, + )); + } + std::collections::btree_map::Entry::Occupied(entry) => { + let EmbedderAction { + was_quantized: _, + is_being_quantized: _, + write_back: _, // We are deleting this embedder, so no point in regeneration + reindex: _, // We are already fully reindexing + } = entry.get(); + } + }; + } + } + InnerIndexSettingsDiff { old: old_settings, new: new_settings, @@ -1518,7 +1561,7 @@ fn embedders(embedding_configs: Vec) -> Result) -> Result { + let max_bytes = match document_template_max_bytes.set() { + Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { + crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes { + embedder_name: name.to_owned(), + } + })?, + None => default_max_bytes(), + }; + // validate - let template = crate::prompt::Prompt::new(template) - .map(|prompt| crate::prompt::PromptData::from(prompt).template) - .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; + let template = crate::prompt::Prompt::new( + template, + // always specify a max_bytes + Some(max_bytes), + ) + .map(|prompt| crate::prompt::PromptData::from(prompt).template) + .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; Ok(Setting::Set(EmbeddingSettings { source, @@ -1565,11 +1623,13 @@ fn validate_prompt( api_key, dimensions, document_template: Setting::Set(template), + document_template_max_bytes, url, request, response, distribution, headers, + binary_quantized: binary_quantize, })) } new => Ok(new), @@ -1589,11 +1649,13 @@ pub fn validate_embedding_settings( api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, distribution, headers, + binary_quantized: binary_quantize, } = settings; if let Some(0) = dimensions.set() { @@ -1628,11 +1690,13 @@ pub fn validate_embedding_settings( api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, distribution, headers, + binary_quantized: binary_quantize, })); }; match inferred_source { @@ -1700,6 +1764,12 @@ pub fn validate_embedding_settings( inferred_source, name, )?; + check_unset( + &document_template_max_bytes, + EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES, + inferred_source, + name, + )?; check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; @@ -1722,11 +1792,13 @@ pub fn validate_embedding_settings( api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, distribution, headers, + binary_quantized: binary_quantize, })) } diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index caccb404b..d52e68bbe 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -1,8 +1,12 @@ use std::collections::HashMap; use std::sync::Arc; +use arroy::distances::{Angular, BinaryQuantizedAngular}; +use arroy::ItemId; use deserr::{DeserializeError, Deserr}; +use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; +use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use self::error::{EmbedError, NewEmbedderError}; @@ -26,6 +30,171 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; +pub struct ArroyWrapper { + quantized: bool, + index: u16, + database: arroy::Database, +} + +impl ArroyWrapper { + pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { + Self { database, index, quantized } + } + + pub fn index(&self) -> u16 { + self.index + } + + pub fn dimensions(&self, rtxn: &RoTxn) -> Result { + if self.quantized { + Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions()) + } else { + Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions()) + } + } + + pub fn quantize( + &mut self, + wtxn: &mut RwTxn, + index: u16, + dimension: usize, + ) -> Result<(), arroy::Error> { + if !self.quantized { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + writer.prepare_changing_distance::(wtxn)?; + self.quantized = true; + } + Ok(()) + } + + pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn) + } + } + + pub fn build( + &self, + wtxn: &mut RwTxn, + rng: &mut R, + dimension: usize, + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None) + } + } + + pub fn add_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + vector: &[f32], + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } + } + + pub fn del_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id) + } + } + + pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn) + } + } + + pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn) + } + } + + pub fn contains_item( + &self, + rtxn: &RoTxn, + dimension: usize, + item: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item) + } + } + + pub fn nns_by_item( + &self, + rtxn: &RoTxn, + item: ItemId, + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } + } + + pub fn nns_by_vector( + &self, + txn: &RoTxn, + item: &[f32], + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result, arroy::Error> { + if self.quantized { + arroy::Reader::open(txn, self.index, self.quantized_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } else { + arroy::Reader::open(txn, self.index, self.angular_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } + } + + pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid) + } + } + + fn angular_db(&self) -> arroy::Database { + self.database.remap_data_type() + } + + fn quantized_db(&self) -> arroy::Database { + self.database.remap_data_type() + } +} + /// One or multiple embeddings stored consecutively in a flat vector. pub struct Embeddings { data: Vec, @@ -124,62 +293,48 @@ pub struct EmbeddingConfig { pub embedder_options: EmbedderOptions, /// Document template pub prompt: PromptData, + /// If this embedder is binary quantized + pub quantized: Option, // TODO: add metrics and anything needed } +impl EmbeddingConfig { + pub fn quantized(&self) -> bool { + self.quantized.unwrap_or_default() + } +} + /// Map of embedder configurations. /// /// Each configuration is mapped to a name. #[derive(Clone, Default)] -pub struct EmbeddingConfigs(HashMap, Arc)>); +pub struct EmbeddingConfigs(HashMap, Arc, bool)>); impl EmbeddingConfigs { /// Create the map from its internal component.s - pub fn new(data: HashMap, Arc)>) -> Self { + pub fn new(data: HashMap, Arc, bool)>) -> Self { Self(data) } /// Get an embedder configuration and template from its name. - pub fn get(&self, name: &str) -> Option<(Arc, Arc)> { + pub fn get(&self, name: &str) -> Option<(Arc, Arc, bool)> { self.0.get(name).cloned() } - /// Get the default embedder configuration, if any. - pub fn get_default(&self) -> Option<(Arc, Arc)> { - self.get(self.get_default_embedder_name()) - } - - pub fn inner_as_ref(&self) -> &HashMap, Arc)> { + pub fn inner_as_ref(&self) -> &HashMap, Arc, bool)> { &self.0 } - pub fn into_inner(self) -> HashMap, Arc)> { + pub fn into_inner(self) -> HashMap, Arc, bool)> { self.0 } - - /// Get the name of the default embedder configuration. - /// - /// The default embedder is determined as follows: - /// - /// - If there is only one embedder, it is always the default. - /// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder. - /// - In all other cases, there is no default embedder. - pub fn get_default_embedder_name(&self) -> &str { - let mut it = self.0.keys(); - let first_name = it.next(); - let second_name = it.next(); - match (first_name, second_name) { - (None, _) => "default", - (Some(first), None) => first, - (Some(_), Some(_)) => "default", - } - } } impl IntoIterator for EmbeddingConfigs { - type Item = (String, (Arc, Arc)); + type Item = (String, (Arc, Arc, bool)); - type IntoIter = std::collections::hash_map::IntoIter, Arc)>; + type IntoIter = + std::collections::hash_map::IntoIter, Arc, bool)>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() @@ -305,6 +460,16 @@ impl Embedder { Embedder::Rest(embedder) => embedder.distribution(), } } + + pub fn uses_document_template(&self) -> bool { + match self { + Embedder::HuggingFace(_) + | Embedder::OpenAi(_) + | Embedder::Ollama(_) + | Embedder::Rest(_) => true, + Embedder::UserProvided(_) => false, + } + } } /// Describes the mean and sigma of distribution of embedding similarity in the embedding space. diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index cef45f90e..152d1fb7a 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -66,11 +66,11 @@ pub enum EmbeddingModel { // # WARNING // // If ever adding a model, make sure to add it to the list of supported models below. - #[default] #[serde(rename = "text-embedding-ada-002")] #[deserr(rename = "text-embedding-ada-002")] TextEmbeddingAda002, + #[default] #[serde(rename = "text-embedding-3-small")] #[deserr(rename = "text-embedding-3-small")] TextEmbedding3Small, diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 3cb90cbdb..3bb7f09e6 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,11 +1,12 @@ use std::collections::BTreeMap; +use std::num::NonZeroUsize; use deserr::Deserr; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use super::{ollama, openai, DistributionShift}; -use crate::prompt::PromptData; +use crate::prompt::{default_max_bytes, PromptData}; use crate::update::Setting; use crate::vector::EmbeddingConfig; use crate::UserError; @@ -31,9 +32,15 @@ pub struct EmbeddingSettings { pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub binary_quantized: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub document_template_max_bytes: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub url: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] @@ -81,23 +88,63 @@ pub enum ReindexAction { pub enum SettingsDiff { Remove, - Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, - UpdateWithoutReindex { updated_settings: EmbeddingSettings }, + Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool }, + UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool }, } -pub enum EmbedderAction { - WriteBackToDocuments(WriteBackToDocuments), - Reindex(ReindexAction), +#[derive(Default, Debug)] +pub struct EmbedderAction { + pub was_quantized: bool, + pub is_being_quantized: bool, + pub write_back: Option, + pub reindex: Option, } +impl EmbedderAction { + pub fn is_being_quantized(&self) -> bool { + self.is_being_quantized + } + + pub fn write_back(&self) -> Option<&WriteBackToDocuments> { + self.write_back.as_ref() + } + + pub fn reindex(&self) -> Option<&ReindexAction> { + self.reindex.as_ref() + } + + pub fn with_is_being_quantized(mut self, quantize: bool) -> Self { + self.is_being_quantized = quantize; + self + } + + pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self { + Self { + was_quantized, + is_being_quantized: false, + write_back: Some(write_back), + reindex: None, + } + } + + pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self { + Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) } + } +} + +#[derive(Debug)] pub struct WriteBackToDocuments { pub embedder_id: u8, pub user_provided: RoaringBitmap, } impl SettingsDiff { - pub fn from_settings(old: EmbeddingSettings, new: Setting) -> Self { - match new { + pub fn from_settings( + embedder_name: &str, + old: EmbeddingSettings, + new: Setting, + ) -> Result { + let ret = match new { Setting::Set(new) => { let EmbeddingSettings { mut source, @@ -111,6 +158,8 @@ impl SettingsDiff { mut response, mut distribution, mut headers, + mut document_template_max_bytes, + binary_quantized: mut binary_quantize, } = old; let EmbeddingSettings { @@ -125,8 +174,18 @@ impl SettingsDiff { response: new_response, distribution: new_distribution, headers: new_headers, + document_template_max_bytes: new_document_template_max_bytes, + binary_quantized: new_binary_quantize, } = new; + if matches!(binary_quantize, Setting::Set(true)) + && matches!(new_binary_quantize, Setting::Set(false)) + { + return Err(UserError::InvalidDisableBinaryQuantization { + embedder_name: embedder_name.to_string(), + }); + } + let mut reindex_action = None; // **Warning**: do not use short-circuiting || here, we want all these operations applied @@ -142,6 +201,7 @@ impl SettingsDiff { &mut request, &mut response, &mut document_template, + &mut document_template_max_bytes, &mut headers, ) } @@ -165,6 +225,7 @@ impl SettingsDiff { _ => {} } } + let binary_quantize_changed = binary_quantize.apply(new_binary_quantize); if url.apply(new_url) { match source { // do not regenerate on an url change in OpenAI @@ -190,6 +251,23 @@ impl SettingsDiff { ); } + if document_template_max_bytes.apply(new_document_template_max_bytes) { + let previous_document_template_max_bytes = + document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + let new_document_template_max_bytes = + new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + + // only reindex if the size increased. Reasoning: + // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors + // - size increase is an accuracy optimization, so we want to reindex + if new_document_template_max_bytes > previous_document_template_max_bytes { + ReindexAction::push_action( + &mut reindex_action, + ReindexAction::RegeneratePrompts, + ) + } + } + distribution.apply(new_distribution); api_key.apply(new_api_key); headers.apply(new_headers); @@ -206,16 +284,28 @@ impl SettingsDiff { response, distribution, headers, + document_template_max_bytes, + binary_quantized: binary_quantize, }; match reindex_action { - Some(action) => Self::Reindex { action, updated_settings }, - None => Self::UpdateWithoutReindex { updated_settings }, + Some(action) => Self::Reindex { + action, + updated_settings, + quantize: binary_quantize_changed, + }, + None => Self::UpdateWithoutReindex { + updated_settings, + quantize: binary_quantize_changed, + }, } } Setting::Reset => Self::Remove, - Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, - } + Setting::NotSet => { + Self::UpdateWithoutReindex { updated_settings: old, quantize: false } + } + }; + Ok(ret) } } @@ -239,6 +329,7 @@ fn apply_default_for_source( request: &mut Setting, response: &mut Setting, document_template: &mut Setting, + document_template_max_bytes: &mut Setting, headers: &mut Setting>, ) { match source { @@ -286,6 +377,7 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *document_template = Setting::NotSet; + *document_template_max_bytes = Setting::NotSet; *headers = Setting::NotSet; } Setting::NotSet => {} @@ -316,6 +408,7 @@ impl EmbeddingSettings { pub const API_KEY: &'static str = "apiKey"; pub const DIMENSIONS: &'static str = "dimensions"; pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; + pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes"; pub const URL: &'static str = "url"; pub const REQUEST: &'static str = "request"; @@ -458,7 +551,9 @@ impl std::fmt::Display for EmbedderSource { impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { - let EmbeddingConfig { embedder_options, prompt } = value; + let EmbeddingConfig { embedder_options, prompt, quantized } = value; + let document_template_max_bytes = + Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { model, @@ -471,11 +566,13 @@ impl From for EmbeddingSettings { api_key: Setting::NotSet, dimensions: Setting::NotSet, document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { url, @@ -490,11 +587,13 @@ impl From for EmbeddingSettings { api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::some_or_not_set(url), request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { embedding_model, @@ -509,11 +608,13 @@ impl From for EmbeddingSettings { api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::some_or_not_set(url), request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { dimensions, @@ -525,11 +626,13 @@ impl From for EmbeddingSettings { api_key: Setting::NotSet, dimensions: Setting::Set(dimensions), document_template: Setting::NotSet, + document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Rest(super::rest::EmbedderOptions { api_key, @@ -546,11 +649,13 @@ impl From for EmbeddingSettings { api_key: Setting::some_or_not_set(api_key), dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), + document_template_max_bytes, url: Setting::Set(url), request: Setting::Set(request), response: Setting::Set(response), distribution: Setting::some_or_not_set(distribution), headers: Setting::Set(headers), + binary_quantized: Setting::some_or_not_set(quantized), }, } } @@ -566,13 +671,17 @@ impl From for EmbeddingConfig { api_key, dimensions, document_template, + document_template_max_bytes, url, request, response, distribution, headers, + binary_quantized, } = value; + this.quantized = binary_quantized.set(); + if let Some(source) = source.set() { match source { EmbedderSource::OpenAi => { @@ -648,7 +757,12 @@ impl From for EmbeddingConfig { } if let Setting::Set(template) = document_template { - this.prompt = PromptData { template } + let max_bytes = document_template_max_bytes + .set() + .and_then(NonZeroUsize::new) + .unwrap_or(default_max_bytes()); + + this.prompt = PromptData { template, max_bytes: Some(max_bytes) } } this