diff --git a/Cargo.lock b/Cargo.lock index 1af89d382..bcca35173 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "arroy" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a" +source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d" dependencies = [ "bytemuck", "byteorder", "heed", "log", "memmap2", + "nohash", "ordered-float", "rand", "rayon", @@ -933,9 +933,9 @@ dependencies = [ [[package]] name = "charabia" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d" +checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798" dependencies = [ "aho-corasick", "csv", @@ -2838,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" dependencies = [ "cfg-if", - "windows-targets 0.48.1", + "windows-targets 0.52.4", ] [[package]] @@ -3686,6 +3686,12 @@ version = "0.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" +[[package]] +name = "nohash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca" + [[package]] name = "nom" version = "7.1.3" diff --git a/README.md b/README.md index e60d09b13..59d618ab2 100644 --- a/README.md +++ b/README.md @@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co ## ✨ Features - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results - **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience -- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings +- **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need -- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results +- **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling -- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants +- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs - **Easy to install, deploy, and maintain** diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs index 3b96cbfb0..4f66ed8b3 100644 --- a/dump/src/reader/mod.rs +++ b/dump/src/reader/mod.rs @@ -255,6 +255,8 @@ pub(crate) mod test { } "###); + insta::assert_json_snapshot!(vector_index.settings().unwrap()); + { let documents: Result> = vector_index.documents().unwrap().collect(); let mut documents = documents.unwrap(); diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap index 43bdb9726..77694a629 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap @@ -1,783 +1,56 @@ --- source: dump/src/reader/mod.rs -expression: document +expression: vector_index.settings().unwrap() --- { - "id": "e3", - "desc": "overriden vector + map", - "_vectors": { - "default": [ - 0.2, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ], - "toto": [ - 0.1 - ] - } + "displayedAttributes": [ + "*" + ], + "searchableAttributes": [ + "*" + ], + "filterableAttributes": [], + "sortableAttributes": [], + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness" + ], + "stopWords": [], + "nonSeparatorTokens": [], + "separatorTokens": [], + "dictionary": [], + "synonyms": {}, + "distinctAttribute": null, + "proximityPrecision": "byWord", + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 5, + "twoTypos": 9 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + "faceting": { + "maxValuesPerFacet": 100, + "sortFacetValuesBy": { + "*": "alpha" + } + }, + "pagination": { + "maxTotalHits": 1000 + }, + "embedders": { + "default": { + "source": "huggingFace", + "model": "BAAI/bge-base-en-v1.5", + "revision": "617ca489d9e86b49b8167676d8220688b99db36e", + "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}" + } + }, + "searchCutoffMs": null } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap index a9c76227a..43bdb9726 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap @@ -3,784 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e2", - "desc": "natural vector + map", + "id": "e3", + "desc": "overriden vector + map", "_vectors": { - "toto": [], - "default": { - "embeddings": [ - [ - -0.05189208313822746, - -0.9273212552070618, - 0.1443813145160675, - 0.0932632014155388, - 0.2665371894836426, - 0.36266782879829407, - 0.6402910947799683, - 0.32014018297195435, - 0.030915971845388412, - -0.9312191605567932, - -0.3718109726905823, - -0.2700554132461548, - -1.1014580726623535, - 0.9154956936836244, - -0.3406888246536255, - 1.0077725648880005, - 0.6577560901641846, - -0.3955195546150207, - -0.4148270785808563, - 0.1855088472366333, - 0.5062315464019775, - -0.3632686734199524, - -0.2277890294790268, - 0.2560805082321167, - -0.3853609561920166, - -0.1604762226343155, - -0.13947471976280212, - -0.20147813856601715, - -0.4466346800327301, - -0.3761846721172333, - 0.1443382054567337, - 0.18205296993255615, - 0.49359792470932007, - -0.22538000345230105, - -0.4996317625045776, - -0.22734887897968292, - -0.6034309267997742, - -0.7857939600944519, - -0.34923747181892395, - -0.3466345965862274, - 0.21176661550998688, - -0.5101462006568909, - -0.3403083384037018, - 0.000315118464641273, - 0.236465722322464, - -0.10246097296476364, - -1.3013339042663574, - 0.3419138789176941, - -0.32963496446609497, - -0.0901619717478752, - -0.5426247119903564, - 0.22656650841236117, - -0.44758284091949463, - 0.14151698350906372, - -0.1089438870549202, - 0.5500766634941101, - -0.670711100101471, - -0.6227269768714905, - 0.3894464075565338, - -0.27609574794769287, - 0.7028202414512634, - -0.19697771966457367, - 0.328511506319046, - 0.5063360929489136, - 0.4065195322036743, - 0.2614171802997589, - -0.30274391174316406, - 1.0393824577331543, - -0.7742937207221985, - -0.7874112129211426, - -0.6749666929244995, - 0.5190866589546204, - 0.004123548045754433, - -0.28312963247299194, - -0.038731709122657776, - -1.0142987966537476, - -0.09519586712121964, - 0.8755272626876831, - 0.4876938760280609, - 0.7811151742935181, - 0.85174959897995, - 0.11826585978269576, - 0.5373436808586121, - 0.3649002015590668, - 0.19064077734947205, - -0.00287026260048151, - -0.7305403351783752, - -0.015206154435873032, - -0.7899249196052551, - 0.19407285749912265, - 0.08596625179052353, - -0.28976231813430786, - -0.1525907665491104, - 0.3798313438892365, - 0.050306469202041626, - -0.5697937607765198, - 0.4219021201133728, - 0.276252806186676, - 0.1559903472661972, - 0.10030482709407806, - -0.4043720066547394, - -0.1969818025827408, - 0.5739826560020447, - 0.2116064727306366, - -1.4620544910430908, - -0.7802462577819824, - -0.24739810824394223, - -0.09791352599859238, - -0.4413802027702331, - 0.21549351513385773, - -0.9520436525344848, - -0.08762510865926743, - 0.08154498040676117, - -0.6154940724372864, - -1.01079523563385, - 0.885427713394165, - 0.6967288851737976, - 0.27186504006385803, - -0.43194177746772766, - -0.11248451471328735, - 0.7576630711555481, - 0.4998855590820313, - 0.0264343973249197, - 0.9872855544090272, - 0.5634694695472717, - 0.053698331117630005, - 0.19410227239131927, - 0.3570743501186371, - -0.23670297861099243, - -0.9114483594894408, - 0.07884842902421951, - 0.7318344116210938, - 0.44630110263824463, - 0.08745364099740982, - -0.347101628780365, - -0.4314247667789459, - -0.5060274004936218, - 0.003706763498485088, - 0.44320008158683777, - -0.00788921769708395, - -0.1368623524904251, - -0.17391923069953918, - 0.14473655819892883, - 0.10927865654230118, - 0.6974599361419678, - 0.005052129738032818, - -0.016953065991401672, - -0.1256176233291626, - -0.036742497235536575, - 0.5591985583305359, - -0.37619709968566895, - 0.22429119050502777, - 0.5403043031692505, - -0.8603790998458862, - -0.3456307053565979, - 0.9292937517166138, - 0.5074859261512756, - 0.6310645937919617, - -0.3091641068458557, - 0.46902573108673096, - 0.7891915440559387, - 0.4499550759792328, - 0.2744995653629303, - 0.2712305784225464, - -0.04349074140191078, - -0.3638863265514374, - 0.7839881777763367, - 0.7352104783058167, - -0.19457511603832245, - -0.5957832932472229, - -0.43704694509506226, - -1.084769368171692, - 0.4904985725879669, - 0.5385226011276245, - 0.1891629993915558, - 0.12338479608297348, - 0.8315675258636475, - -0.07830192148685455, - 1.0916285514831543, - -0.28066861629486084, - -1.3585069179534912, - 0.5203898549079895, - 0.08678033947944641, - -0.2566044330596924, - 0.09484415501356123, - -0.0180208683013916, - 1.0264745950698853, - -0.023572135716676712, - 0.5864979028701782, - 0.7625196576118469, - -0.2543414533138275, - -0.8877770900726318, - 0.7611982822418213, - -0.06220436468720436, - 0.937336564064026, - 0.2704363465309143, - -0.37733694911003113, - 0.5076137781143188, - -0.30641937255859375, - 0.6252772808074951, - -0.0823579877614975, - -0.03736555948853493, - 0.4131673276424408, - -0.6514252424240112, - 0.12918265163898468, - -0.4483584463596344, - 0.6750786304473877, - -0.37008383870124817, - -0.02324833907186985, - 0.38027650117874146, - -0.26374951004981995, - 0.4346931278705597, - 0.42882832884788513, - -0.48798441886901855, - 1.1882442235946655, - 0.5132288336753845, - 0.5284568667411804, - -0.03538886830210686, - 0.29620853066444397, - -1.0683696269989014, - 0.25936177372932434, - 0.10404160618782043, - -0.25796034932136536, - 0.027896970510482788, - -0.09225251525640488, - 1.4811025857925415, - 0.641173779964447, - -0.13838383555412292, - -0.3437179923057556, - 0.5667019486427307, - -0.5400741696357727, - 0.31090837717056274, - 0.6470608115196228, - -0.3747067153453827, - -0.7364534735679626, - -0.07431528717279434, - 0.5173454880714417, - -0.6578747034072876, - 0.7107478976249695, - -0.7918999791145325, - -0.0648345872759819, - 0.609937846660614, - -0.7329513430595398, - 0.9741371870040894, - 0.17912346124649048, - -0.02658769302070141, - 0.5162150859832764, - -0.3978803157806397, - -0.7833885550498962, - -0.6497276425361633, - -0.3898126780986786, - -0.0952848568558693, - 0.2663288116455078, - -0.1604052186012268, - 0.373076468706131, - -0.8357769250869751, - -0.05217683315277099, - -0.2680160701274872, - 0.8389158248901367, - 0.6833611130714417, - -0.6712407469749451, - 0.7406917214393616, - -0.44522786140441895, - -0.34645363688468933, - -0.27384576201438904, - -0.9878405928611756, - -0.8166060447692871, - 0.06268279999494553, - 0.38567957282066345, - -0.3274703919887543, - 0.5296315550804138, - -0.11810623109340668, - 0.23029841482639313, - 0.08616159111261368, - -0.2195747196674347, - 0.09430307894945145, - 0.4057176411151886, - 0.4892159104347229, - -0.1636916548013687, - -0.6071445345878601, - 0.41256585717201233, - 0.622254490852356, - -0.41223976016044617, - -0.6686707139015198, - -0.7474371790885925, - -0.8509522080421448, - -0.16754287481307983, - -0.9078601002693176, - -0.29653599858283997, - -0.5020652413368225, - 0.4692700505256653, - 0.01281109917908907, - -0.16071580350399017, - 0.03388889133930206, - -0.020511148497462273, - 0.5027827024459839, - -0.20729811489582065, - 0.48107290267944336, - 0.33669769763946533, - -0.5275911688804626, - 0.48271527886390686, - 0.2738940715789795, - -0.033152539283037186, - -0.13629786670207977, - -0.05965912342071533, - -0.26200807094573975, - 0.04002794995903969, - -0.34095603227615356, - -3.986898899078369, - -0.46819332242012024, - -0.422744482755661, - -0.169097900390625, - 0.6008929014205933, - 0.058016058057546616, - -0.11401277780532836, - -0.3077819049358368, - -0.09595538675785063, - 0.6723822355270386, - 0.19367831945419312, - 0.28304359316825867, - 0.1609862744808197, - 0.7567598819732666, - 0.6889985799789429, - 0.06907720118761063, - -0.04188092052936554, - -0.7434936165809631, - 0.13321782648563385, - 0.8456063270568848, - -0.10364038497209548, - -0.45084846019744873, - -0.4758241474628449, - 0.43882066011428833, - -0.6432598829269409, - 0.7217311859130859, - -0.24189773201942444, - 0.12737572193145752, - -1.1008601188659668, - -0.3305315673351288, - 0.14614742994308472, - -0.7819333076477051, - 0.5287120342254639, - -0.055538054555654526, - 0.1877404749393463, - -0.6907662153244019, - 0.5616975426673889, - -0.4611121714115143, - -0.26109233498573303, - -0.12898315489292145, - -0.3724522292613983, - -0.7191406488418579, - -0.4425233602523804, - -0.644108235836029, - 0.8424481153488159, - 0.17532426118850708, - -0.5121750235557556, - -0.6467239260673523, - -0.0008507720194756985, - 0.7866212129592896, - -0.02644744887948036, - -0.005045140627771616, - 0.015782782807946205, - 0.16334445774555206, - -0.1913367658853531, - -0.13697923719882965, - -0.6684983372688293, - 0.18346354365348816, - -0.341105580329895, - 0.5427411198616028, - 0.3779832422733307, - -0.6778115034103394, - -0.2931850254535675, - -0.8805161714553833, - -0.4212774932384491, - -0.5368952751159668, - -1.3937891721725464, - -1.225494146347046, - 0.4276703894138336, - 1.1205668449401855, - -0.6005299687385559, - 0.15732505917549133, - -0.3914784789085388, - -1.357046604156494, - -0.4707142114639282, - -0.1497287154197693, - -0.25035548210144043, - -0.34328439831733704, - 0.39083412289619446, - 0.1623048633337021, - -0.9275814294815063, - -0.6430015563964844, - 0.2973862886428833, - 0.5580436587333679, - -0.6232585310935974, - -0.6611042022705078, - 0.4015969038009643, - -1.0232892036437988, - -0.2585645020008087, - -0.5431421399116516, - 0.5021264553070068, - -0.48601630330085754, - -0.010242084041237833, - 0.5862035155296326, - 0.7316920161247253, - 0.4036808013916016, - 0.4269520044326782, - -0.705938458442688, - 0.7747307419776917, - 0.10164368897676468, - 0.7887958884239197, - -0.9612497091293336, - 0.12755516171455383, - 0.06812842190265656, - -0.022603651508688927, - 0.14722754061222076, - -0.5588505268096924, - -0.20689940452575684, - 0.3557641804218292, - -0.6812759637832642, - 0.2860803008079529, - -0.38954633474349976, - 0.1759403496980667, - -0.5678874850273132, - -0.1692986786365509, - -0.14578519761562347, - 0.5711379051208496, - 1.0208125114440918, - 0.7759483456611633, - -0.372348427772522, - -0.5460885763168335, - 0.7190321683883667, - -0.6914990544319153, - 0.13365162909030914, - -0.4854792356491089, - 0.4054908752441406, - 0.4502798914909363, - -0.3041122555732727, - -0.06726965308189392, - -0.05570871382951737, - -0.0455719493329525, - 0.4785125255584717, - 0.8867972493171692, - 0.4107886850833893, - 0.6121342182159424, - -0.20477132499217987, - -0.5598517656326294, - -0.6443566679954529, - -0.5905212759971619, - -0.5571200251579285, - 0.17573799192905426, - -0.28621870279312134, - 0.1685224026441574, - 0.09719007462263109, - -0.04223639518022537, - -0.28623101115226746, - -0.1449810117483139, - -0.3789580464363098, - -0.5227636098861694, - -0.049728814512491226, - 0.7849089503288269, - 0.16792525351047516, - 0.9849340915679932, - -0.6559549570083618, - 0.35723909735679626, - -0.6822739243507385, - 1.2873116731643677, - 0.19993330538272855, - 0.03512010723352432, - -0.6972134113311768, - 0.18453484773635864, - -0.2437680810689926, - 0.2156416028738022, - 0.5230382680892944, - 0.22020135819911957, - 0.8314080238342285, - 0.15627102553844452, - -0.7330264449119568, - 0.3888184726238251, - -0.22034703195095065, - 0.5457669496536255, - -0.48084837198257446, - -0.45576658844947815, - -0.09287727624177931, - -0.06968110054731369, - 0.35125672817230225, - -0.4278119504451752, - 0.2038476765155792, - 0.11392722278833388, - 0.9433983564376832, - -0.4097744226455689, - 0.035297419875860214, - -0.4274404048919678, - -0.25100165605545044, - 1.0943366289138794, - -0.07634022831916809, - -0.2925529479980469, - -0.7512530088424683, - 0.2649727463722229, - -0.4078235328197479, - -0.3372223973274231, - 0.05190162733197212, - 0.005654910113662481, - -0.0001571219472680241, - -0.35445958375930786, - -0.7837416529655457, - 0.1500556766986847, - 0.4383024573326111, - 0.6099548935890198, - 0.05951934307813645, - -0.21325334906578064, - 0.0199207104742527, - -0.22704418003559113, - -0.6481077671051025, - 0.37442275881767273, - -1.015955924987793, - 0.38637226819992065, - -0.06489371508359909, - -0.494120329618454, - 0.3469836115837097, - 0.15402406454086304, - -0.7660972476005554, - -0.7053225040435791, - -0.25964751839637756, - 0.014004424214363098, - -0.2860170006752014, - -0.17565494775772095, - -0.45117494463920593, - -0.0031954257283359766, - 0.09676837921142578, - -0.514464259147644, - 0.41698193550109863, - -0.21642713248729703, - -0.5398141145706177, - -0.3647628426551819, - 0.37005379796028137, - 0.239425927400589, - -0.08833975344896317, - 0.934946596622467, - -0.48340797424316406, - 0.6241437792778015, - -0.7253676652908325, - -0.04303571209311485, - 1.1125205755233765, - -0.15692919492721558, - -0.2914651036262512, - -0.5117168426513672, - 0.21365483105182648, - 0.4924402534961701, - 0.5269662141799927, - 0.0352792888879776, - -0.149167999625206, - -0.6019760370254517, - 0.08245442807674408, - 0.4900692105293274, - 0.518824577331543, - -0.00005570516441366635, - -0.553304135799408, - 0.22217543423175812, - 0.5047767758369446, - 0.135724738240242, - 1.1511540412902832, - -0.3541218340396881, - -0.9712511897087096, - 0.8353699445724487, - -0.39227569103240967, - -0.9117669463157654, - -0.26349931955337524, - 0.05597023293375969, - 0.20695461332798004, - 0.3178807199001312, - 1.0663238763809204, - 0.5062212347984314, - 0.7288597822189331, - 0.09899299591779707, - 0.553720235824585, - 0.675009548664093, - -0.20067055523395536, - 0.3138423264026642, - -0.6886593103408813, - -0.2910398542881012, - -1.3186300992965698, - -0.4684459865093231, - -0.095743365585804, - -0.1257995069026947, - -0.4858281314373016, - -0.4935407340526581, - -0.3266896903514862, - -0.3928797245025635, - -0.40803104639053345, - -0.9975396394729614, - 0.4229583740234375, - 0.37309643626213074, - 0.4431034922599793, - 0.30364808440208435, - -0.3765178918838501, - 0.5616499185562134, - 0.16904796659946442, - -0.7343707084655762, - 0.2560209631919861, - 0.6166825294494629, - 0.3200829327106476, - -0.4483652710914612, - 0.16224201023578644, - -0.31495288014411926, - -0.42713335156440735, - 0.7270734906196594, - 0.7049484848976135, - -0.0571461021900177, - 0.04477125033736229, - -0.6647796034812927, - 1.183672308921814, - 0.36199676990509033, - 0.046881116926670074, - 0.4515796303749085, - 0.9278061985969543, - 0.31471705436706543, - -0.7073333859443665, - -0.3443860113620758, - 0.5440067052841187, - -0.15020819008350372, - -0.541202962398529, - 0.5203295946121216, - 1.2192286252975464, - -0.9983593225479126, - -0.18758884072303772, - 0.2758221924304962, - -0.6511523723602295, - -0.1584404855966568, - -0.236241415143013, - 0.2692437767982483, - -0.4941152036190033, - 0.4987454116344452, - -0.3331359028816223, - 0.3163745701313019, - 0.745529294013977, - -0.2905873656272888, - 0.13602906465530396, - 0.4679684340953827, - 1.0555986166000366, - 1.075700044631958, - 0.5368486046791077, - -0.5118206739425659, - 0.8668332099914551, - -0.5726966857910156, - -0.7811751961708069, - 0.1938626915216446, - -0.1929349899291992, - 0.1757766306400299, - 0.6384295225143433, - 0.26462844014167786, - 0.9542630314826964, - 0.19313029944896695, - 1.264248013496399, - -0.6304428577423096, - 0.0487106591463089, - -0.16211535036563873, - -0.7894763350486755, - 0.3582514822483063, - -0.04153040423989296, - 0.635784387588501, - 0.6554391980171204, - -0.47010496258735657, - -0.8302040696144104, - -0.1350124627351761, - 0.2568812072277069, - 0.13614831864833832, - -0.2563649117946625, - -1.0434694290161133, - 0.3232482671737671, - 0.47882452607154846, - 0.4298652410507202, - 1.0563770532608032, - -0.28917592763900757, - -0.8533256649971008, - 0.10648339986801147, - 0.6376127004623413, - -0.20832888782024384, - 0.2370245456695557, - 0.0018312990432605147, - -0.2034837007522583, - 0.01051164511591196, - -1.105310082435608, - 0.29724350571632385, - 0.15604574978351593, - 0.1973688006401062, - 0.44394731521606445, - 0.3974513411521912, - -0.13625948131084442, - 0.9571986198425292, - 0.2257384955883026, - 0.2323588728904724, - -0.5583669543266296, - -0.7854922413825989, - 0.1647188365459442, - -1.6098142862319946, - 0.318587988615036, - -0.13399995863437653, - -0.2172701060771942, - -0.767514705657959, - -0.5813586711883545, - -0.3195130527019501, - -0.04894036799669266, - 0.2929930090904236, - -0.8213384747505188, - 0.07181350141763687, - 0.7469993829727173, - 0.6407455801963806, - 0.16365697979927063, - 0.7870153188705444, - 0.6524736881256104, - 0.6399973630905151, - -0.04992736503481865, - -0.03959266096353531, - -0.2512352466583252, - 0.8448855876922607, - -0.1422702670097351, - 0.1216789186000824, - -1.2647287845611572, - 0.5931149125099182, - 0.7186052203178406, - -0.06118432432413101, - -1.1942816972732544, - -0.17677085101604462, - 0.31543800234794617, - -0.32252824306488037, - 0.8255583047866821, - -0.14529970288276672, - -0.2695446312427521, - -0.33378756046295166, - -0.1653425395488739, - 0.1454019844532013, - -0.3920115828514099, - 0.912214994430542, - -0.7279734015464783, - 0.7374742031097412, - 0.933980405330658, - 0.13429680466651917, - -0.514870285987854, - 0.3989711999893189, - -0.11613689363002776, - 0.4022413492202759, - -0.9990655779838562, - -0.33749932050704956, - -0.4334589838981629, - -1.376373291015625, - -0.2993924915790558, - -0.09454808384180068, - -0.01314175222069025, - -0.001090060803107917, - 0.2137461006641388, - 0.2938512861728668, - 0.17508235573768616, - 0.8260607123374939, - -0.7218498587608337, - 0.2414487451314926, - -0.47296759486198425, - -0.3002610504627228, - -1.238540768623352, - 0.08663805574178696, - 0.6805586218833923, - 0.5909030437469482, - -0.42807504534721375, - -0.22887496650218964, - 0.47537800669670105, - -1.0474627017974854, - 0.6338009238243103, - 0.06548397243022919, - 0.4971011281013489, - 1.3484878540039063 - ] - ], - "regenerate": true - } + "default": [ + 0.2, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ], + "toto": [ + 0.1 + ] } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap index e5d28e450..a9c76227a 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap @@ -3,780 +3,781 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e1", - "desc": "natural vector", + "id": "e2", + "desc": "natural vector + map", "_vectors": { + "toto": [], "default": { "embeddings": [ [ - -0.2979458272457123, - -0.5288640856742859, - -0.019957859069108963, - -0.18495318293571472, - 0.7429973483085632, - 0.5238497257232666, - 0.432366281747818, - 0.32744166254997253, - 0.0020762972999364138, - -0.9507834911346436, - -0.35097137093544006, - 0.08469701558351517, - -1.4176613092422483, - 0.4647577106952667, - -0.69340580701828, - 1.0372896194458008, - 0.3716741800308227, - 0.06031008064746857, - -0.6152024269104004, - 0.007914665155112743, - 0.7954924702644348, - -0.20773003995418549, - 0.09376765787601472, - 0.04508133605122566, - -0.2084471583366394, - -0.1518009901046753, - 0.018195509910583496, - -0.07044368237257004, - -0.18119366466999057, - -0.4480230510234833, - 0.3822529911994934, - 0.1911812424659729, - 0.4674372375011444, - 0.06963984668254852, - -0.09341949224472046, - 0.005675444379448891, - -0.6774799227714539, - -0.7066726684570313, - -0.39256376028060913, - 0.04005039855837822, - 0.2084812968969345, - -0.7872875928878784, - -0.8205880522727966, - 0.2919981777667999, - -0.06004738807678223, - -0.4907574355602264, - -1.5937862396240234, - 0.24249385297298431, - -0.14709846675395966, - -0.11860740929841997, - -0.8299489617347717, - 0.472964346408844, - -0.497518390417099, - -0.22205302119255063, - -0.4196169078350067, - 0.32697558403015137, - -0.360930860042572, - -0.9789686799049376, - 0.1887447088956833, - -0.403737336397171, - 0.18524253368377688, - 0.3768732249736786, - 0.3666233420372009, - 0.3511938452720642, - 0.6985810995101929, - 0.41721710562705994, - 0.09754953533411026, - 0.6204307079315186, - -1.0762996673583984, - -0.06263761967420578, - -0.7376511693000793, - 0.6849768161773682, - -0.1745152473449707, - -0.40449759364128113, - 0.20757411420345304, - -0.8424443006515503, - 0.330015629529953, - 0.3489064872264862, - 1.0954371690750122, - 0.8487558960914612, - 1.1076823472976685, - 0.61430823802948, - 0.4155903458595276, - 0.4111340939998626, - 0.05753209814429283, - -0.06429877132177353, - -0.765606164932251, - -0.41703930497169495, - -0.508820652961731, - 0.19859947264194489, - -0.16607828438282013, - -0.28112146258354187, - 0.11032675206661224, - 0.38809511065483093, - -0.36498191952705383, - -0.48671194911003113, - 0.6755134463310242, - 0.03958442434668541, - 0.4478721618652344, - -0.10335399955511092, - -0.9546685814857484, - -0.6087718605995178, - 0.17498846352100372, - 0.08320838958024979, - -1.4478336572647097, - -0.605027437210083, - -0.5867993235588074, - -0.14711688458919525, - -0.5447602272033691, - -0.026259321719408035, - -0.6997418403625488, - -0.07349082082509995, - 0.10638900846242905, - -0.7133527398109436, - -0.9396815299987792, - 1.087092399597168, - 1.1885089874267578, - 0.4011896848678589, - -0.4089202582836151, - -0.10938972979784012, - 0.6726722121238708, - 0.24576938152313232, - -0.24247920513153076, - 1.1499971151351929, - 0.47813335061073303, - -0.05331678315997124, - 0.32338133454322815, - 0.4870913326740265, - -0.23144258558750153, - -1.2023426294326782, - 0.2349330335855484, - 1.080536961555481, - 0.29334118962287903, - 0.391574501991272, - -0.15818795561790466, - -0.2948290705680847, - -0.024689948186278343, - 0.06602869182825089, - 0.5937030911445618, - -0.047901444137096405, - -0.512734591960907, - -0.35780075192451477, - 0.28751692175865173, - 0.4298716187477112, - 0.9242428541183472, - -0.17208744585514069, - 0.11515070497989656, - -0.0335976779460907, - -0.3422986567020416, - 0.5344581604003906, - 0.19895796477794647, - 0.33001241087913513, - 0.6390730142593384, - -0.6074934005737305, - -0.2553696632385254, - 0.9644920229911804, - 0.2699219584465027, - 0.6403993368148804, - -0.6380003690719604, - -0.027310986071825027, - 0.638815701007843, - 0.27719101309776306, - -0.13553589582443237, - 0.750195324420929, - 0.1224869191646576, - -0.20613941550254825, - 0.8444448709487915, - 0.16200250387191772, - -0.24750925600528717, - -0.739950954914093, - -0.28443849086761475, - -1.176282525062561, - 0.516107976436615, - 0.3774825632572174, - 0.10906043648719788, - 0.07962015271186829, - 0.7384604215621948, - -0.051241904497146606, - 1.1730090379714966, - -0.4828610122203827, - -1.404372215270996, - 0.8811132311820984, - -0.3839482367038727, - 0.022516896948218346, - -0.0491158664226532, - -0.43027013540267944, - 1.2049334049224854, - -0.27309560775756836, - 0.6883630752563477, - 0.8264574408531189, - -0.5020735263824463, - -0.4874092042446137, - 0.6007202863693237, - -0.4965405762195587, - 1.1302915811538696, - 0.032572727650403976, - -0.3731859028339386, - 0.658271849155426, - -0.9023059010505676, - 0.7400162220001221, - 0.014550759457051754, - -0.19699542224407196, - 0.2319706380367279, - -0.789058268070221, - -0.14905710518360138, - -0.5826214551925659, - 0.207652747631073, - -0.4507439732551574, - -0.3163885474205017, - 0.3604124188423157, - -0.45119962096214294, - 0.3428427278995514, - 0.3005594313144684, - -0.36026081442832947, - 1.1014249324798584, - 0.40884315967559814, - 0.34991952776908875, - -0.1806638240814209, - 0.27440476417541504, - -0.7118373513221741, - 0.4645499587059021, - 0.214790478348732, - -0.2343102991580963, - 0.10500429570674896, - -0.28034430742263794, - 1.2267805337905884, - 1.0561333894729614, - -0.497364342212677, - -0.6143305897712708, - 0.24963727593421936, - -0.33136463165283203, - -0.01473914459347725, - 0.495918869972229, - -0.6985538005828857, - -1.0033197402954102, - 0.35937801003456116, - 0.6325868368148804, - -0.6808838844299316, - 1.0354058742523191, - -0.7214401960372925, - -0.33318862318992615, - 0.874398410320282, - -0.6594992280006409, - 0.6830640435218811, - -0.18534131348133087, - 0.024834271520376205, - 0.19901277124881744, - -0.5992477536201477, - -1.2126628160476685, - -0.9245557188987732, - -0.3898217976093292, - -0.1286519467830658, - 0.4217943847179413, - -0.1143646091222763, - 0.5630772709846497, - -0.5240639448165894, - 0.21152715384960177, - -0.3792001008987427, - 0.8266305327415466, - 1.170984387397766, - -0.8072142004966736, - 0.11382893472909927, - -0.17953898012638092, - -0.1789460331201553, - -0.15078622102737427, - -1.2082908153533936, - -0.7812382578849792, - -0.10903695970773696, - 0.7303897142410278, - -0.39054441452026367, - 0.19511254131793976, - -0.09121843427419662, - 0.22400228679180145, - 0.30143046379089355, - 0.1141919493675232, - 0.48112115263938904, - 0.7307931780815125, - 0.09701362252235413, - -0.2795647978782654, - -0.3997688889503479, - 0.5540812611579895, - 0.564578115940094, - -0.40065160393714905, - -0.3629159033298493, - -0.3789091110229492, - -0.7298538088798523, - -0.6996853351593018, - -0.4477842152118683, - -0.289089560508728, - -0.6430277824401855, - 0.2344944179058075, - 0.3742927014827728, - -0.5079357028007507, - 0.28841453790664673, - 0.06515737622976303, - 0.707315981388092, - 0.09498685598373412, - 0.8365515470504761, - 0.10002726316452026, - -0.7695478200912476, - 0.6264724135398865, - 0.7562043070793152, - -0.23112858831882477, - -0.2871039807796478, - -0.25010058283805847, - 0.2783474028110504, - -0.03224996477365494, - -0.9119359850883484, - -3.6940200328826904, - -0.5099936127662659, - -0.1604711413383484, - 0.17453284561634064, - 0.41759559512138367, - 0.1419190913438797, - -0.11362407356500626, - -0.33312007784843445, - 0.11511333286762238, - 0.4667884409427643, - -0.0031647447030991316, - 0.15879854559898376, - 0.3042248487472534, - 0.5404849052429199, - 0.8515422344207764, - 0.06286454200744629, - 0.43790125846862793, - -0.8682025074958801, - -0.06363756954669952, - 0.5547921657562256, - -0.01483887154608965, - -0.07361344993114471, - -0.929947018623352, - 0.3502565622329712, - -0.5080993175506592, - 1.0380364656448364, - -0.2017953395843506, - 0.21319580078125, - -1.0763001441955566, - -0.556368887424469, - 0.1949922740459442, - -0.6445739269256592, - 0.6791343688964844, - 0.21188358962535855, - 0.3736183941364288, - -0.21800459921360016, - 0.7597446441650391, - -0.3732394874095917, - -0.4710160195827484, - 0.025146087631583217, - 0.05341297015547752, - -0.9522109627723694, - -0.6000866889953613, - -0.08469046652317047, - 0.5966026186943054, - 0.3444081246852875, - -0.461188405752182, - -0.5279349088668823, - 0.10296865552663804, - 0.5175143480300903, - -0.20671147108078003, - 0.13392412662506104, - 0.4812754988670349, - 0.2993808686733246, - -0.3005635440349579, - 0.5141698122024536, - -0.6239235401153564, - 0.2877119481563568, - -0.4452739953994751, - 0.5621107816696167, - 0.5047508478164673, - -0.4226335883140564, - -0.18578553199768064, - -1.1967322826385498, - 0.28178197145462036, - -0.8692031502723694, - -1.1812998056411743, - -1.4526212215423584, - 0.4645712077617645, - 0.9327932000160216, - -0.6560136675834656, - 0.461549699306488, - -0.5621527433395386, - -1.328449010848999, - -0.08676894754171371, - 0.00021918353741057217, - -0.18864136934280396, - 0.1259666532278061, - 0.18240638077259064, - -0.14919660985469818, - -0.8965857625007629, - -0.7539900541305542, - 0.013973715715110302, - 0.504276692867279, - -0.704748272895813, - -0.6428424119949341, - 0.6303996443748474, - -0.5404738187789917, - -0.31176653504371643, - -0.21262824535369873, - 0.18736739456653595, - -0.7998970746994019, - 0.039946746081113815, - 0.7390344738960266, - 0.4283199906349182, - 0.3795057237148285, - 0.07204607129096985, - -0.9230587482452391, - 0.9440426230430604, - 0.26272690296173096, - 0.5598306655883789, - -1.0520871877670288, - -0.2677186131477356, - -0.1888762265443802, - 0.30426350235939026, - 0.4746131896972656, - -0.5746733546257019, - -0.4197768568992615, - 0.8565112948417664, - -0.6767723560333252, - 0.23448683321475983, - -0.2010004222393036, - 0.4112907350063324, - -0.6497949957847595, - -0.418667733669281, - -0.4950824975967407, - 0.44438859820365906, - 1.026281714439392, - 0.482397586107254, - -0.26220494508743286, - -0.3640787005424499, - 0.5907743573188782, - -0.8771642446517944, - 0.09708411991596222, - -0.3671700060367584, - 0.4331349730491638, - 0.619417667388916, - -0.2684665620326996, - -0.5123821496963501, - -0.1502324342727661, - -0.012190685607492924, - 0.3580845892429352, - 0.8617186546325684, - 0.3493645489215851, - 1.0270192623138428, - 0.18297909200191495, - -0.5881339311599731, - -0.1733516901731491, - -0.5040576457977295, - -0.340370237827301, - -0.26767754554748535, - -0.28570041060447693, - -0.032928116619586945, - 0.6029254794120789, - 0.17397655546665192, - 0.09346921741962431, - 0.27815181016921997, - -0.46699589490890503, - -0.8148876428604126, - -0.3964351713657379, - 0.3812595009803772, - 0.13547226786613464, - 0.7126688361167908, - -0.3473474085330963, - -0.06573959439992905, - -0.6483767032623291, - 1.4808889627456665, - 0.30924928188323975, - -0.5085946917533875, - -0.8613000512123108, - 0.3048902451992035, - -0.4241599142551422, - 0.15909206867218018, - 0.5764641761779785, - -0.07879110425710678, - 1.015336513519287, - 0.07599356025457382, - -0.7025855779647827, - 0.30047643184661865, - -0.35094937682151794, - 0.2522146999835968, - -0.2338722199201584, - -0.8326804637908936, - -0.13695412874221802, - -0.03452421352267265, - 0.47974953055381775, - -0.18385636806488037, - 0.32438594102859497, - 0.1797013282775879, - 0.787494957447052, - -0.12579888105392456, - -0.07507286965847015, - -0.4389670491218567, - 0.2720070779323578, - 0.8138866424560547, - 0.01974171027541161, - -0.3057698905467987, - -0.6709924936294556, - 0.0885881632566452, - -0.2862754464149475, - 0.03475658595561981, - -0.1285519152879715, - 0.3838353455066681, - -0.2944154739379883, - -0.4204859137535095, - -0.4416137933731079, - 0.13426260650157928, - 0.36733248829841614, - 0.573428750038147, - -0.14928072690963745, - -0.026076916605234143, - 0.33286052942276, - -0.5340145826339722, - -0.17279052734375, - -0.01154550164937973, - -0.6620771884918213, - 0.18390542268753052, - -0.08265615254640579, - -0.2489682286977768, - 0.2429984211921692, - -0.044153645634651184, - -0.986578404903412, - -0.33574509620666504, - -0.5387663841247559, - 0.19767941534519196, - 0.12540718913078308, - -0.3403128981590271, - -0.4154576361179352, - 0.17275673151016235, - 0.09407442808151244, - -0.5414086580276489, - 0.4393929839134216, - 0.1725579798221588, - -0.4998118281364441, - -0.6926208138465881, - 0.16552448272705078, - 0.6659538149833679, - -0.10949844866991044, - 0.986426830291748, - 0.01748848147690296, - 0.4003709554672241, - -0.5430638194084167, - 0.35347291827201843, - 0.6887399554252625, - 0.08274628221988678, - 0.13407137989997864, - -0.591465950012207, - 0.3446292281150818, - 0.6069018244743347, - 0.1935492902994156, - -0.0989871397614479, - 0.07008486241102219, - -0.8503749370574951, - -0.09507356584072112, - 0.6259510517120361, - 0.13934025168418884, - 0.06392545253038406, - -0.4112265408039093, - -0.08475656062364578, - 0.4974113404750824, - -0.30606114864349365, - 1.111435890197754, - -0.018766529858112335, - -0.8422622680664063, - 0.4325508773326874, - -0.2832120656967163, - -0.4859798848628998, - -0.41498348116874695, - 0.015977520495653152, - 0.5292825698852539, - 0.4538311660289765, - 1.1328668594360352, - 0.22632671892642975, - 0.7918671369552612, - 0.33401933312416077, - 0.7306135296821594, - 0.3548600673675537, - 0.12506209313869476, - 0.8573207855224609, - -0.5818327069282532, - -0.6953738927841187, - -1.6171947717666626, - -0.1699674427509308, - 0.6318262815475464, - -0.05671752244234085, - -0.28145185112953186, - -0.3976689279079437, - -0.2041076272726059, - -0.5495951175689697, - -0.5152917504310608, - -0.9309796094894408, - 0.101932130753994, - 0.1367802917957306, - 0.1490798443555832, - 0.5304336547851563, - -0.5082434415817261, - 0.06688683480024338, - 0.14657628536224365, - -0.782435953617096, - 0.2962816655635834, - 0.6965363621711731, - 0.8496337532997131, - -0.3042965829372406, - 0.04343798756599426, - 0.0330701619386673, - -0.5662598013877869, - 1.1086925268173218, - 0.756072998046875, - -0.204134538769722, - 0.2404300570487976, - -0.47848284244537354, - 1.3659011125564575, - 0.5645433068275452, - -0.15836156904697418, - 0.43395575881004333, - 0.5944653749465942, - 1.0043466091156006, - -0.49446743726730347, - -0.5954391360282898, - 0.5341240763664246, - 0.020598189905285835, - -0.4036853015422821, - 0.4473709762096405, - 1.1998231410980225, - -0.9317775368690492, - -0.23321466147899628, - 0.2052552700042725, - -0.7423108816146851, - -0.19917210936546328, - -0.1722569614648819, - -0.034072667360305786, - -0.00671181408688426, - 0.46396249532699585, - -0.1372445821762085, - 0.053376372903585434, - 0.7392690777778625, - -0.38447609543800354, - 0.07497968524694443, - 0.5197252631187439, - 1.3746477365493774, - 0.9060075879096984, - 0.20000585913658145, - -0.4053704142570496, - 0.7497360110282898, - -0.34087055921554565, - -1.101803183555603, - 0.273650586605072, - -0.5125769376754761, - 0.22472351789474487, - 0.480757474899292, - -0.19845178723335263, - 0.8857700824737549, - 0.30752456188201904, - 1.1109285354614258, - -0.6768012642860413, - 0.524367094039917, - -0.22495046257972717, - -0.4224412739276886, - 0.40753406286239624, - -0.23133376240730288, - 0.3297771215438843, - 0.4905449151992798, - -0.6813114285469055, - -0.7543983459472656, - -0.5599071383476257, - 0.14351597428321838, - -0.029278717935085297, - -0.3970443606376648, - -0.303079217672348, - 0.24161772429943085, - 0.008353390730917454, - -0.0062365154735744, - 1.0824860334396362, - -0.3704061508178711, - -1.0337258577346802, - 0.04638749733567238, - 1.163011074066162, - -0.31737643480300903, - 0.013986887410283089, - 0.19223114848136905, - -0.2260770797729492, - -0.210910826921463, - -1.0191949605941772, - 0.22356095910072327, - 0.09353553503751756, - 0.18096882104873657, - 0.14867214858531952, - 0.43408671021461487, - -0.33312076330184937, - 0.8173948526382446, - 0.6428242921829224, - 0.20215003192424777, - -0.6634518504142761, - -0.4132290482521057, - 0.29815030097961426, - -1.579406976699829, - -0.0981958732008934, - -0.03941014781594277, - 0.1709178239107132, - -0.5481140613555908, - -0.5338194966316223, - -0.3528362512588501, - -0.11561278253793716, - -0.21793591976165771, - -1.1570470333099363, - 0.2157980799674988, - 0.42083489894866943, - 0.9639263153076172, - 0.09747201204299928, - 0.15671424567699432, - 0.4034591615200043, - 0.6728067994117737, - -0.5216875672340393, - 0.09657668322324751, - -0.2416689097881317, - 0.747975766658783, - 0.1021689772605896, - 0.11652665585279463, - -1.0484966039657593, - 0.8489304780960083, - 0.7169828414916992, - -0.09012343734502792, - -1.3173753023147583, - 0.057890523225069046, - -0.006231260951608419, - -0.1018214002251625, - 0.936040461063385, - -0.0502331368625164, - -0.4284322261810303, - -0.38209280371665955, - -0.22668412327766416, - 0.0782942995429039, - -0.4881664514541626, - 0.9268959760665894, - 0.001867273123934865, - 0.42261114716529846, - 0.8283362984657288, - 0.4256294071674347, - -0.7965338826179504, - 0.4840078353881836, - -0.19861412048339844, - 0.33977967500686646, - -0.4604192078113556, - -0.3107339143753052, - -0.2839638590812683, - -1.5734281539916992, - 0.005220232997089624, - 0.09239906817674635, - -0.7828494906425476, - -0.1397123783826828, - 0.2576255202293396, - 0.21372435986995697, - -0.23169949650764465, - 0.4016408920288086, - -0.462497353553772, - -0.2186472862958908, - -0.5617868900299072, - -0.3649831712245941, - -1.1585862636566162, - -0.08222806453704834, - 0.931126832962036, - 0.4327389597892761, - -0.46451422572135925, - -0.5430706143379211, - -0.27434298396110535, - -0.9479129314422609, - 0.1845661848783493, - 0.3972720205783844, - 0.4883299469947815, - 1.04031240940094 + -0.05189208313822746, + -0.9273212552070618, + 0.1443813145160675, + 0.0932632014155388, + 0.2665371894836426, + 0.36266782879829407, + 0.6402910947799683, + 0.32014018297195435, + 0.030915971845388412, + -0.9312191605567932, + -0.3718109726905823, + -0.2700554132461548, + -1.1014580726623535, + 0.9154956936836244, + -0.3406888246536255, + 1.0077725648880005, + 0.6577560901641846, + -0.3955195546150207, + -0.4148270785808563, + 0.1855088472366333, + 0.5062315464019775, + -0.3632686734199524, + -0.2277890294790268, + 0.2560805082321167, + -0.3853609561920166, + -0.1604762226343155, + -0.13947471976280212, + -0.20147813856601715, + -0.4466346800327301, + -0.3761846721172333, + 0.1443382054567337, + 0.18205296993255615, + 0.49359792470932007, + -0.22538000345230105, + -0.4996317625045776, + -0.22734887897968292, + -0.6034309267997742, + -0.7857939600944519, + -0.34923747181892395, + -0.3466345965862274, + 0.21176661550998688, + -0.5101462006568909, + -0.3403083384037018, + 0.000315118464641273, + 0.236465722322464, + -0.10246097296476364, + -1.3013339042663574, + 0.3419138789176941, + -0.32963496446609497, + -0.0901619717478752, + -0.5426247119903564, + 0.22656650841236117, + -0.44758284091949463, + 0.14151698350906372, + -0.1089438870549202, + 0.5500766634941101, + -0.670711100101471, + -0.6227269768714905, + 0.3894464075565338, + -0.27609574794769287, + 0.7028202414512634, + -0.19697771966457367, + 0.328511506319046, + 0.5063360929489136, + 0.4065195322036743, + 0.2614171802997589, + -0.30274391174316406, + 1.0393824577331543, + -0.7742937207221985, + -0.7874112129211426, + -0.6749666929244995, + 0.5190866589546204, + 0.004123548045754433, + -0.28312963247299194, + -0.038731709122657776, + -1.0142987966537476, + -0.09519586712121964, + 0.8755272626876831, + 0.4876938760280609, + 0.7811151742935181, + 0.85174959897995, + 0.11826585978269576, + 0.5373436808586121, + 0.3649002015590668, + 0.19064077734947205, + -0.00287026260048151, + -0.7305403351783752, + -0.015206154435873032, + -0.7899249196052551, + 0.19407285749912265, + 0.08596625179052353, + -0.28976231813430786, + -0.1525907665491104, + 0.3798313438892365, + 0.050306469202041626, + -0.5697937607765198, + 0.4219021201133728, + 0.276252806186676, + 0.1559903472661972, + 0.10030482709407806, + -0.4043720066547394, + -0.1969818025827408, + 0.5739826560020447, + 0.2116064727306366, + -1.4620544910430908, + -0.7802462577819824, + -0.24739810824394223, + -0.09791352599859238, + -0.4413802027702331, + 0.21549351513385773, + -0.9520436525344848, + -0.08762510865926743, + 0.08154498040676117, + -0.6154940724372864, + -1.01079523563385, + 0.885427713394165, + 0.6967288851737976, + 0.27186504006385803, + -0.43194177746772766, + -0.11248451471328735, + 0.7576630711555481, + 0.4998855590820313, + 0.0264343973249197, + 0.9872855544090272, + 0.5634694695472717, + 0.053698331117630005, + 0.19410227239131927, + 0.3570743501186371, + -0.23670297861099243, + -0.9114483594894408, + 0.07884842902421951, + 0.7318344116210938, + 0.44630110263824463, + 0.08745364099740982, + -0.347101628780365, + -0.4314247667789459, + -0.5060274004936218, + 0.003706763498485088, + 0.44320008158683777, + -0.00788921769708395, + -0.1368623524904251, + -0.17391923069953918, + 0.14473655819892883, + 0.10927865654230118, + 0.6974599361419678, + 0.005052129738032818, + -0.016953065991401672, + -0.1256176233291626, + -0.036742497235536575, + 0.5591985583305359, + -0.37619709968566895, + 0.22429119050502777, + 0.5403043031692505, + -0.8603790998458862, + -0.3456307053565979, + 0.9292937517166138, + 0.5074859261512756, + 0.6310645937919617, + -0.3091641068458557, + 0.46902573108673096, + 0.7891915440559387, + 0.4499550759792328, + 0.2744995653629303, + 0.2712305784225464, + -0.04349074140191078, + -0.3638863265514374, + 0.7839881777763367, + 0.7352104783058167, + -0.19457511603832245, + -0.5957832932472229, + -0.43704694509506226, + -1.084769368171692, + 0.4904985725879669, + 0.5385226011276245, + 0.1891629993915558, + 0.12338479608297348, + 0.8315675258636475, + -0.07830192148685455, + 1.0916285514831543, + -0.28066861629486084, + -1.3585069179534912, + 0.5203898549079895, + 0.08678033947944641, + -0.2566044330596924, + 0.09484415501356123, + -0.0180208683013916, + 1.0264745950698853, + -0.023572135716676712, + 0.5864979028701782, + 0.7625196576118469, + -0.2543414533138275, + -0.8877770900726318, + 0.7611982822418213, + -0.06220436468720436, + 0.937336564064026, + 0.2704363465309143, + -0.37733694911003113, + 0.5076137781143188, + -0.30641937255859375, + 0.6252772808074951, + -0.0823579877614975, + -0.03736555948853493, + 0.4131673276424408, + -0.6514252424240112, + 0.12918265163898468, + -0.4483584463596344, + 0.6750786304473877, + -0.37008383870124817, + -0.02324833907186985, + 0.38027650117874146, + -0.26374951004981995, + 0.4346931278705597, + 0.42882832884788513, + -0.48798441886901855, + 1.1882442235946655, + 0.5132288336753845, + 0.5284568667411804, + -0.03538886830210686, + 0.29620853066444397, + -1.0683696269989014, + 0.25936177372932434, + 0.10404160618782043, + -0.25796034932136536, + 0.027896970510482788, + -0.09225251525640488, + 1.4811025857925415, + 0.641173779964447, + -0.13838383555412292, + -0.3437179923057556, + 0.5667019486427307, + -0.5400741696357727, + 0.31090837717056274, + 0.6470608115196228, + -0.3747067153453827, + -0.7364534735679626, + -0.07431528717279434, + 0.5173454880714417, + -0.6578747034072876, + 0.7107478976249695, + -0.7918999791145325, + -0.0648345872759819, + 0.609937846660614, + -0.7329513430595398, + 0.9741371870040894, + 0.17912346124649048, + -0.02658769302070141, + 0.5162150859832764, + -0.3978803157806397, + -0.7833885550498962, + -0.6497276425361633, + -0.3898126780986786, + -0.0952848568558693, + 0.2663288116455078, + -0.1604052186012268, + 0.373076468706131, + -0.8357769250869751, + -0.05217683315277099, + -0.2680160701274872, + 0.8389158248901367, + 0.6833611130714417, + -0.6712407469749451, + 0.7406917214393616, + -0.44522786140441895, + -0.34645363688468933, + -0.27384576201438904, + -0.9878405928611756, + -0.8166060447692871, + 0.06268279999494553, + 0.38567957282066345, + -0.3274703919887543, + 0.5296315550804138, + -0.11810623109340668, + 0.23029841482639313, + 0.08616159111261368, + -0.2195747196674347, + 0.09430307894945145, + 0.4057176411151886, + 0.4892159104347229, + -0.1636916548013687, + -0.6071445345878601, + 0.41256585717201233, + 0.622254490852356, + -0.41223976016044617, + -0.6686707139015198, + -0.7474371790885925, + -0.8509522080421448, + -0.16754287481307983, + -0.9078601002693176, + -0.29653599858283997, + -0.5020652413368225, + 0.4692700505256653, + 0.01281109917908907, + -0.16071580350399017, + 0.03388889133930206, + -0.020511148497462273, + 0.5027827024459839, + -0.20729811489582065, + 0.48107290267944336, + 0.33669769763946533, + -0.5275911688804626, + 0.48271527886390686, + 0.2738940715789795, + -0.033152539283037186, + -0.13629786670207977, + -0.05965912342071533, + -0.26200807094573975, + 0.04002794995903969, + -0.34095603227615356, + -3.986898899078369, + -0.46819332242012024, + -0.422744482755661, + -0.169097900390625, + 0.6008929014205933, + 0.058016058057546616, + -0.11401277780532836, + -0.3077819049358368, + -0.09595538675785063, + 0.6723822355270386, + 0.19367831945419312, + 0.28304359316825867, + 0.1609862744808197, + 0.7567598819732666, + 0.6889985799789429, + 0.06907720118761063, + -0.04188092052936554, + -0.7434936165809631, + 0.13321782648563385, + 0.8456063270568848, + -0.10364038497209548, + -0.45084846019744873, + -0.4758241474628449, + 0.43882066011428833, + -0.6432598829269409, + 0.7217311859130859, + -0.24189773201942444, + 0.12737572193145752, + -1.1008601188659668, + -0.3305315673351288, + 0.14614742994308472, + -0.7819333076477051, + 0.5287120342254639, + -0.055538054555654526, + 0.1877404749393463, + -0.6907662153244019, + 0.5616975426673889, + -0.4611121714115143, + -0.26109233498573303, + -0.12898315489292145, + -0.3724522292613983, + -0.7191406488418579, + -0.4425233602523804, + -0.644108235836029, + 0.8424481153488159, + 0.17532426118850708, + -0.5121750235557556, + -0.6467239260673523, + -0.0008507720194756985, + 0.7866212129592896, + -0.02644744887948036, + -0.005045140627771616, + 0.015782782807946205, + 0.16334445774555206, + -0.1913367658853531, + -0.13697923719882965, + -0.6684983372688293, + 0.18346354365348816, + -0.341105580329895, + 0.5427411198616028, + 0.3779832422733307, + -0.6778115034103394, + -0.2931850254535675, + -0.8805161714553833, + -0.4212774932384491, + -0.5368952751159668, + -1.3937891721725464, + -1.225494146347046, + 0.4276703894138336, + 1.1205668449401855, + -0.6005299687385559, + 0.15732505917549133, + -0.3914784789085388, + -1.357046604156494, + -0.4707142114639282, + -0.1497287154197693, + -0.25035548210144043, + -0.34328439831733704, + 0.39083412289619446, + 0.1623048633337021, + -0.9275814294815063, + -0.6430015563964844, + 0.2973862886428833, + 0.5580436587333679, + -0.6232585310935974, + -0.6611042022705078, + 0.4015969038009643, + -1.0232892036437988, + -0.2585645020008087, + -0.5431421399116516, + 0.5021264553070068, + -0.48601630330085754, + -0.010242084041237833, + 0.5862035155296326, + 0.7316920161247253, + 0.4036808013916016, + 0.4269520044326782, + -0.705938458442688, + 0.7747307419776917, + 0.10164368897676468, + 0.7887958884239197, + -0.9612497091293336, + 0.12755516171455383, + 0.06812842190265656, + -0.022603651508688927, + 0.14722754061222076, + -0.5588505268096924, + -0.20689940452575684, + 0.3557641804218292, + -0.6812759637832642, + 0.2860803008079529, + -0.38954633474349976, + 0.1759403496980667, + -0.5678874850273132, + -0.1692986786365509, + -0.14578519761562347, + 0.5711379051208496, + 1.0208125114440918, + 0.7759483456611633, + -0.372348427772522, + -0.5460885763168335, + 0.7190321683883667, + -0.6914990544319153, + 0.13365162909030914, + -0.4854792356491089, + 0.4054908752441406, + 0.4502798914909363, + -0.3041122555732727, + -0.06726965308189392, + -0.05570871382951737, + -0.0455719493329525, + 0.4785125255584717, + 0.8867972493171692, + 0.4107886850833893, + 0.6121342182159424, + -0.20477132499217987, + -0.5598517656326294, + -0.6443566679954529, + -0.5905212759971619, + -0.5571200251579285, + 0.17573799192905426, + -0.28621870279312134, + 0.1685224026441574, + 0.09719007462263109, + -0.04223639518022537, + -0.28623101115226746, + -0.1449810117483139, + -0.3789580464363098, + -0.5227636098861694, + -0.049728814512491226, + 0.7849089503288269, + 0.16792525351047516, + 0.9849340915679932, + -0.6559549570083618, + 0.35723909735679626, + -0.6822739243507385, + 1.2873116731643677, + 0.19993330538272855, + 0.03512010723352432, + -0.6972134113311768, + 0.18453484773635864, + -0.2437680810689926, + 0.2156416028738022, + 0.5230382680892944, + 0.22020135819911957, + 0.8314080238342285, + 0.15627102553844452, + -0.7330264449119568, + 0.3888184726238251, + -0.22034703195095065, + 0.5457669496536255, + -0.48084837198257446, + -0.45576658844947815, + -0.09287727624177931, + -0.06968110054731369, + 0.35125672817230225, + -0.4278119504451752, + 0.2038476765155792, + 0.11392722278833388, + 0.9433983564376832, + -0.4097744226455689, + 0.035297419875860214, + -0.4274404048919678, + -0.25100165605545044, + 1.0943366289138794, + -0.07634022831916809, + -0.2925529479980469, + -0.7512530088424683, + 0.2649727463722229, + -0.4078235328197479, + -0.3372223973274231, + 0.05190162733197212, + 0.005654910113662481, + -0.0001571219472680241, + -0.35445958375930786, + -0.7837416529655457, + 0.1500556766986847, + 0.4383024573326111, + 0.6099548935890198, + 0.05951934307813645, + -0.21325334906578064, + 0.0199207104742527, + -0.22704418003559113, + -0.6481077671051025, + 0.37442275881767273, + -1.015955924987793, + 0.38637226819992065, + -0.06489371508359909, + -0.494120329618454, + 0.3469836115837097, + 0.15402406454086304, + -0.7660972476005554, + -0.7053225040435791, + -0.25964751839637756, + 0.014004424214363098, + -0.2860170006752014, + -0.17565494775772095, + -0.45117494463920593, + -0.0031954257283359766, + 0.09676837921142578, + -0.514464259147644, + 0.41698193550109863, + -0.21642713248729703, + -0.5398141145706177, + -0.3647628426551819, + 0.37005379796028137, + 0.239425927400589, + -0.08833975344896317, + 0.934946596622467, + -0.48340797424316406, + 0.6241437792778015, + -0.7253676652908325, + -0.04303571209311485, + 1.1125205755233765, + -0.15692919492721558, + -0.2914651036262512, + -0.5117168426513672, + 0.21365483105182648, + 0.4924402534961701, + 0.5269662141799927, + 0.0352792888879776, + -0.149167999625206, + -0.6019760370254517, + 0.08245442807674408, + 0.4900692105293274, + 0.518824577331543, + -0.00005570516441366635, + -0.553304135799408, + 0.22217543423175812, + 0.5047767758369446, + 0.135724738240242, + 1.1511540412902832, + -0.3541218340396881, + -0.9712511897087096, + 0.8353699445724487, + -0.39227569103240967, + -0.9117669463157654, + -0.26349931955337524, + 0.05597023293375969, + 0.20695461332798004, + 0.3178807199001312, + 1.0663238763809204, + 0.5062212347984314, + 0.7288597822189331, + 0.09899299591779707, + 0.553720235824585, + 0.675009548664093, + -0.20067055523395536, + 0.3138423264026642, + -0.6886593103408813, + -0.2910398542881012, + -1.3186300992965698, + -0.4684459865093231, + -0.095743365585804, + -0.1257995069026947, + -0.4858281314373016, + -0.4935407340526581, + -0.3266896903514862, + -0.3928797245025635, + -0.40803104639053345, + -0.9975396394729614, + 0.4229583740234375, + 0.37309643626213074, + 0.4431034922599793, + 0.30364808440208435, + -0.3765178918838501, + 0.5616499185562134, + 0.16904796659946442, + -0.7343707084655762, + 0.2560209631919861, + 0.6166825294494629, + 0.3200829327106476, + -0.4483652710914612, + 0.16224201023578644, + -0.31495288014411926, + -0.42713335156440735, + 0.7270734906196594, + 0.7049484848976135, + -0.0571461021900177, + 0.04477125033736229, + -0.6647796034812927, + 1.183672308921814, + 0.36199676990509033, + 0.046881116926670074, + 0.4515796303749085, + 0.9278061985969543, + 0.31471705436706543, + -0.7073333859443665, + -0.3443860113620758, + 0.5440067052841187, + -0.15020819008350372, + -0.541202962398529, + 0.5203295946121216, + 1.2192286252975464, + -0.9983593225479126, + -0.18758884072303772, + 0.2758221924304962, + -0.6511523723602295, + -0.1584404855966568, + -0.236241415143013, + 0.2692437767982483, + -0.4941152036190033, + 0.4987454116344452, + -0.3331359028816223, + 0.3163745701313019, + 0.745529294013977, + -0.2905873656272888, + 0.13602906465530396, + 0.4679684340953827, + 1.0555986166000366, + 1.075700044631958, + 0.5368486046791077, + -0.5118206739425659, + 0.8668332099914551, + -0.5726966857910156, + -0.7811751961708069, + 0.1938626915216446, + -0.1929349899291992, + 0.1757766306400299, + 0.6384295225143433, + 0.26462844014167786, + 0.9542630314826964, + 0.19313029944896695, + 1.264248013496399, + -0.6304428577423096, + 0.0487106591463089, + -0.16211535036563873, + -0.7894763350486755, + 0.3582514822483063, + -0.04153040423989296, + 0.635784387588501, + 0.6554391980171204, + -0.47010496258735657, + -0.8302040696144104, + -0.1350124627351761, + 0.2568812072277069, + 0.13614831864833832, + -0.2563649117946625, + -1.0434694290161133, + 0.3232482671737671, + 0.47882452607154846, + 0.4298652410507202, + 1.0563770532608032, + -0.28917592763900757, + -0.8533256649971008, + 0.10648339986801147, + 0.6376127004623413, + -0.20832888782024384, + 0.2370245456695557, + 0.0018312990432605147, + -0.2034837007522583, + 0.01051164511591196, + -1.105310082435608, + 0.29724350571632385, + 0.15604574978351593, + 0.1973688006401062, + 0.44394731521606445, + 0.3974513411521912, + -0.13625948131084442, + 0.9571986198425292, + 0.2257384955883026, + 0.2323588728904724, + -0.5583669543266296, + -0.7854922413825989, + 0.1647188365459442, + -1.6098142862319946, + 0.318587988615036, + -0.13399995863437653, + -0.2172701060771942, + -0.767514705657959, + -0.5813586711883545, + -0.3195130527019501, + -0.04894036799669266, + 0.2929930090904236, + -0.8213384747505188, + 0.07181350141763687, + 0.7469993829727173, + 0.6407455801963806, + 0.16365697979927063, + 0.7870153188705444, + 0.6524736881256104, + 0.6399973630905151, + -0.04992736503481865, + -0.03959266096353531, + -0.2512352466583252, + 0.8448855876922607, + -0.1422702670097351, + 0.1216789186000824, + -1.2647287845611572, + 0.5931149125099182, + 0.7186052203178406, + -0.06118432432413101, + -1.1942816972732544, + -0.17677085101604462, + 0.31543800234794617, + -0.32252824306488037, + 0.8255583047866821, + -0.14529970288276672, + -0.2695446312427521, + -0.33378756046295166, + -0.1653425395488739, + 0.1454019844532013, + -0.3920115828514099, + 0.912214994430542, + -0.7279734015464783, + 0.7374742031097412, + 0.933980405330658, + 0.13429680466651917, + -0.514870285987854, + 0.3989711999893189, + -0.11613689363002776, + 0.4022413492202759, + -0.9990655779838562, + -0.33749932050704956, + -0.4334589838981629, + -1.376373291015625, + -0.2993924915790558, + -0.09454808384180068, + -0.01314175222069025, + -0.001090060803107917, + 0.2137461006641388, + 0.2938512861728668, + 0.17508235573768616, + 0.8260607123374939, + -0.7218498587608337, + 0.2414487451314926, + -0.47296759486198425, + -0.3002610504627228, + -1.238540768623352, + 0.08663805574178696, + 0.6805586218833923, + 0.5909030437469482, + -0.42807504534721375, + -0.22887496650218964, + 0.47537800669670105, + -1.0474627017974854, + 0.6338009238243103, + 0.06548397243022919, + 0.4971011281013489, + 1.3484878540039063 ] ], "regenerate": true diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap index 4bd0e2c3e..e5d28e450 100644 --- a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap @@ -3,778 +3,783 @@ source: dump/src/reader/mod.rs expression: document --- { - "id": "e0", - "desc": "overriden vector", + "id": "e1", + "desc": "natural vector", "_vectors": { - "default": [ - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1, - 0.1 - ] + "default": { + "embeddings": [ + [ + -0.2979458272457123, + -0.5288640856742859, + -0.019957859069108963, + -0.18495318293571472, + 0.7429973483085632, + 0.5238497257232666, + 0.432366281747818, + 0.32744166254997253, + 0.0020762972999364138, + -0.9507834911346436, + -0.35097137093544006, + 0.08469701558351517, + -1.4176613092422483, + 0.4647577106952667, + -0.69340580701828, + 1.0372896194458008, + 0.3716741800308227, + 0.06031008064746857, + -0.6152024269104004, + 0.007914665155112743, + 0.7954924702644348, + -0.20773003995418549, + 0.09376765787601472, + 0.04508133605122566, + -0.2084471583366394, + -0.1518009901046753, + 0.018195509910583496, + -0.07044368237257004, + -0.18119366466999057, + -0.4480230510234833, + 0.3822529911994934, + 0.1911812424659729, + 0.4674372375011444, + 0.06963984668254852, + -0.09341949224472046, + 0.005675444379448891, + -0.6774799227714539, + -0.7066726684570313, + -0.39256376028060913, + 0.04005039855837822, + 0.2084812968969345, + -0.7872875928878784, + -0.8205880522727966, + 0.2919981777667999, + -0.06004738807678223, + -0.4907574355602264, + -1.5937862396240234, + 0.24249385297298431, + -0.14709846675395966, + -0.11860740929841997, + -0.8299489617347717, + 0.472964346408844, + -0.497518390417099, + -0.22205302119255063, + -0.4196169078350067, + 0.32697558403015137, + -0.360930860042572, + -0.9789686799049376, + 0.1887447088956833, + -0.403737336397171, + 0.18524253368377688, + 0.3768732249736786, + 0.3666233420372009, + 0.3511938452720642, + 0.6985810995101929, + 0.41721710562705994, + 0.09754953533411026, + 0.6204307079315186, + -1.0762996673583984, + -0.06263761967420578, + -0.7376511693000793, + 0.6849768161773682, + -0.1745152473449707, + -0.40449759364128113, + 0.20757411420345304, + -0.8424443006515503, + 0.330015629529953, + 0.3489064872264862, + 1.0954371690750122, + 0.8487558960914612, + 1.1076823472976685, + 0.61430823802948, + 0.4155903458595276, + 0.4111340939998626, + 0.05753209814429283, + -0.06429877132177353, + -0.765606164932251, + -0.41703930497169495, + -0.508820652961731, + 0.19859947264194489, + -0.16607828438282013, + -0.28112146258354187, + 0.11032675206661224, + 0.38809511065483093, + -0.36498191952705383, + -0.48671194911003113, + 0.6755134463310242, + 0.03958442434668541, + 0.4478721618652344, + -0.10335399955511092, + -0.9546685814857484, + -0.6087718605995178, + 0.17498846352100372, + 0.08320838958024979, + -1.4478336572647097, + -0.605027437210083, + -0.5867993235588074, + -0.14711688458919525, + -0.5447602272033691, + -0.026259321719408035, + -0.6997418403625488, + -0.07349082082509995, + 0.10638900846242905, + -0.7133527398109436, + -0.9396815299987792, + 1.087092399597168, + 1.1885089874267578, + 0.4011896848678589, + -0.4089202582836151, + -0.10938972979784012, + 0.6726722121238708, + 0.24576938152313232, + -0.24247920513153076, + 1.1499971151351929, + 0.47813335061073303, + -0.05331678315997124, + 0.32338133454322815, + 0.4870913326740265, + -0.23144258558750153, + -1.2023426294326782, + 0.2349330335855484, + 1.080536961555481, + 0.29334118962287903, + 0.391574501991272, + -0.15818795561790466, + -0.2948290705680847, + -0.024689948186278343, + 0.06602869182825089, + 0.5937030911445618, + -0.047901444137096405, + -0.512734591960907, + -0.35780075192451477, + 0.28751692175865173, + 0.4298716187477112, + 0.9242428541183472, + -0.17208744585514069, + 0.11515070497989656, + -0.0335976779460907, + -0.3422986567020416, + 0.5344581604003906, + 0.19895796477794647, + 0.33001241087913513, + 0.6390730142593384, + -0.6074934005737305, + -0.2553696632385254, + 0.9644920229911804, + 0.2699219584465027, + 0.6403993368148804, + -0.6380003690719604, + -0.027310986071825027, + 0.638815701007843, + 0.27719101309776306, + -0.13553589582443237, + 0.750195324420929, + 0.1224869191646576, + -0.20613941550254825, + 0.8444448709487915, + 0.16200250387191772, + -0.24750925600528717, + -0.739950954914093, + -0.28443849086761475, + -1.176282525062561, + 0.516107976436615, + 0.3774825632572174, + 0.10906043648719788, + 0.07962015271186829, + 0.7384604215621948, + -0.051241904497146606, + 1.1730090379714966, + -0.4828610122203827, + -1.404372215270996, + 0.8811132311820984, + -0.3839482367038727, + 0.022516896948218346, + -0.0491158664226532, + -0.43027013540267944, + 1.2049334049224854, + -0.27309560775756836, + 0.6883630752563477, + 0.8264574408531189, + -0.5020735263824463, + -0.4874092042446137, + 0.6007202863693237, + -0.4965405762195587, + 1.1302915811538696, + 0.032572727650403976, + -0.3731859028339386, + 0.658271849155426, + -0.9023059010505676, + 0.7400162220001221, + 0.014550759457051754, + -0.19699542224407196, + 0.2319706380367279, + -0.789058268070221, + -0.14905710518360138, + -0.5826214551925659, + 0.207652747631073, + -0.4507439732551574, + -0.3163885474205017, + 0.3604124188423157, + -0.45119962096214294, + 0.3428427278995514, + 0.3005594313144684, + -0.36026081442832947, + 1.1014249324798584, + 0.40884315967559814, + 0.34991952776908875, + -0.1806638240814209, + 0.27440476417541504, + -0.7118373513221741, + 0.4645499587059021, + 0.214790478348732, + -0.2343102991580963, + 0.10500429570674896, + -0.28034430742263794, + 1.2267805337905884, + 1.0561333894729614, + -0.497364342212677, + -0.6143305897712708, + 0.24963727593421936, + -0.33136463165283203, + -0.01473914459347725, + 0.495918869972229, + -0.6985538005828857, + -1.0033197402954102, + 0.35937801003456116, + 0.6325868368148804, + -0.6808838844299316, + 1.0354058742523191, + -0.7214401960372925, + -0.33318862318992615, + 0.874398410320282, + -0.6594992280006409, + 0.6830640435218811, + -0.18534131348133087, + 0.024834271520376205, + 0.19901277124881744, + -0.5992477536201477, + -1.2126628160476685, + -0.9245557188987732, + -0.3898217976093292, + -0.1286519467830658, + 0.4217943847179413, + -0.1143646091222763, + 0.5630772709846497, + -0.5240639448165894, + 0.21152715384960177, + -0.3792001008987427, + 0.8266305327415466, + 1.170984387397766, + -0.8072142004966736, + 0.11382893472909927, + -0.17953898012638092, + -0.1789460331201553, + -0.15078622102737427, + -1.2082908153533936, + -0.7812382578849792, + -0.10903695970773696, + 0.7303897142410278, + -0.39054441452026367, + 0.19511254131793976, + -0.09121843427419662, + 0.22400228679180145, + 0.30143046379089355, + 0.1141919493675232, + 0.48112115263938904, + 0.7307931780815125, + 0.09701362252235413, + -0.2795647978782654, + -0.3997688889503479, + 0.5540812611579895, + 0.564578115940094, + -0.40065160393714905, + -0.3629159033298493, + -0.3789091110229492, + -0.7298538088798523, + -0.6996853351593018, + -0.4477842152118683, + -0.289089560508728, + -0.6430277824401855, + 0.2344944179058075, + 0.3742927014827728, + -0.5079357028007507, + 0.28841453790664673, + 0.06515737622976303, + 0.707315981388092, + 0.09498685598373412, + 0.8365515470504761, + 0.10002726316452026, + -0.7695478200912476, + 0.6264724135398865, + 0.7562043070793152, + -0.23112858831882477, + -0.2871039807796478, + -0.25010058283805847, + 0.2783474028110504, + -0.03224996477365494, + -0.9119359850883484, + -3.6940200328826904, + -0.5099936127662659, + -0.1604711413383484, + 0.17453284561634064, + 0.41759559512138367, + 0.1419190913438797, + -0.11362407356500626, + -0.33312007784843445, + 0.11511333286762238, + 0.4667884409427643, + -0.0031647447030991316, + 0.15879854559898376, + 0.3042248487472534, + 0.5404849052429199, + 0.8515422344207764, + 0.06286454200744629, + 0.43790125846862793, + -0.8682025074958801, + -0.06363756954669952, + 0.5547921657562256, + -0.01483887154608965, + -0.07361344993114471, + -0.929947018623352, + 0.3502565622329712, + -0.5080993175506592, + 1.0380364656448364, + -0.2017953395843506, + 0.21319580078125, + -1.0763001441955566, + -0.556368887424469, + 0.1949922740459442, + -0.6445739269256592, + 0.6791343688964844, + 0.21188358962535855, + 0.3736183941364288, + -0.21800459921360016, + 0.7597446441650391, + -0.3732394874095917, + -0.4710160195827484, + 0.025146087631583217, + 0.05341297015547752, + -0.9522109627723694, + -0.6000866889953613, + -0.08469046652317047, + 0.5966026186943054, + 0.3444081246852875, + -0.461188405752182, + -0.5279349088668823, + 0.10296865552663804, + 0.5175143480300903, + -0.20671147108078003, + 0.13392412662506104, + 0.4812754988670349, + 0.2993808686733246, + -0.3005635440349579, + 0.5141698122024536, + -0.6239235401153564, + 0.2877119481563568, + -0.4452739953994751, + 0.5621107816696167, + 0.5047508478164673, + -0.4226335883140564, + -0.18578553199768064, + -1.1967322826385498, + 0.28178197145462036, + -0.8692031502723694, + -1.1812998056411743, + -1.4526212215423584, + 0.4645712077617645, + 0.9327932000160216, + -0.6560136675834656, + 0.461549699306488, + -0.5621527433395386, + -1.328449010848999, + -0.08676894754171371, + 0.00021918353741057217, + -0.18864136934280396, + 0.1259666532278061, + 0.18240638077259064, + -0.14919660985469818, + -0.8965857625007629, + -0.7539900541305542, + 0.013973715715110302, + 0.504276692867279, + -0.704748272895813, + -0.6428424119949341, + 0.6303996443748474, + -0.5404738187789917, + -0.31176653504371643, + -0.21262824535369873, + 0.18736739456653595, + -0.7998970746994019, + 0.039946746081113815, + 0.7390344738960266, + 0.4283199906349182, + 0.3795057237148285, + 0.07204607129096985, + -0.9230587482452391, + 0.9440426230430604, + 0.26272690296173096, + 0.5598306655883789, + -1.0520871877670288, + -0.2677186131477356, + -0.1888762265443802, + 0.30426350235939026, + 0.4746131896972656, + -0.5746733546257019, + -0.4197768568992615, + 0.8565112948417664, + -0.6767723560333252, + 0.23448683321475983, + -0.2010004222393036, + 0.4112907350063324, + -0.6497949957847595, + -0.418667733669281, + -0.4950824975967407, + 0.44438859820365906, + 1.026281714439392, + 0.482397586107254, + -0.26220494508743286, + -0.3640787005424499, + 0.5907743573188782, + -0.8771642446517944, + 0.09708411991596222, + -0.3671700060367584, + 0.4331349730491638, + 0.619417667388916, + -0.2684665620326996, + -0.5123821496963501, + -0.1502324342727661, + -0.012190685607492924, + 0.3580845892429352, + 0.8617186546325684, + 0.3493645489215851, + 1.0270192623138428, + 0.18297909200191495, + -0.5881339311599731, + -0.1733516901731491, + -0.5040576457977295, + -0.340370237827301, + -0.26767754554748535, + -0.28570041060447693, + -0.032928116619586945, + 0.6029254794120789, + 0.17397655546665192, + 0.09346921741962431, + 0.27815181016921997, + -0.46699589490890503, + -0.8148876428604126, + -0.3964351713657379, + 0.3812595009803772, + 0.13547226786613464, + 0.7126688361167908, + -0.3473474085330963, + -0.06573959439992905, + -0.6483767032623291, + 1.4808889627456665, + 0.30924928188323975, + -0.5085946917533875, + -0.8613000512123108, + 0.3048902451992035, + -0.4241599142551422, + 0.15909206867218018, + 0.5764641761779785, + -0.07879110425710678, + 1.015336513519287, + 0.07599356025457382, + -0.7025855779647827, + 0.30047643184661865, + -0.35094937682151794, + 0.2522146999835968, + -0.2338722199201584, + -0.8326804637908936, + -0.13695412874221802, + -0.03452421352267265, + 0.47974953055381775, + -0.18385636806488037, + 0.32438594102859497, + 0.1797013282775879, + 0.787494957447052, + -0.12579888105392456, + -0.07507286965847015, + -0.4389670491218567, + 0.2720070779323578, + 0.8138866424560547, + 0.01974171027541161, + -0.3057698905467987, + -0.6709924936294556, + 0.0885881632566452, + -0.2862754464149475, + 0.03475658595561981, + -0.1285519152879715, + 0.3838353455066681, + -0.2944154739379883, + -0.4204859137535095, + -0.4416137933731079, + 0.13426260650157928, + 0.36733248829841614, + 0.573428750038147, + -0.14928072690963745, + -0.026076916605234143, + 0.33286052942276, + -0.5340145826339722, + -0.17279052734375, + -0.01154550164937973, + -0.6620771884918213, + 0.18390542268753052, + -0.08265615254640579, + -0.2489682286977768, + 0.2429984211921692, + -0.044153645634651184, + -0.986578404903412, + -0.33574509620666504, + -0.5387663841247559, + 0.19767941534519196, + 0.12540718913078308, + -0.3403128981590271, + -0.4154576361179352, + 0.17275673151016235, + 0.09407442808151244, + -0.5414086580276489, + 0.4393929839134216, + 0.1725579798221588, + -0.4998118281364441, + -0.6926208138465881, + 0.16552448272705078, + 0.6659538149833679, + -0.10949844866991044, + 0.986426830291748, + 0.01748848147690296, + 0.4003709554672241, + -0.5430638194084167, + 0.35347291827201843, + 0.6887399554252625, + 0.08274628221988678, + 0.13407137989997864, + -0.591465950012207, + 0.3446292281150818, + 0.6069018244743347, + 0.1935492902994156, + -0.0989871397614479, + 0.07008486241102219, + -0.8503749370574951, + -0.09507356584072112, + 0.6259510517120361, + 0.13934025168418884, + 0.06392545253038406, + -0.4112265408039093, + -0.08475656062364578, + 0.4974113404750824, + -0.30606114864349365, + 1.111435890197754, + -0.018766529858112335, + -0.8422622680664063, + 0.4325508773326874, + -0.2832120656967163, + -0.4859798848628998, + -0.41498348116874695, + 0.015977520495653152, + 0.5292825698852539, + 0.4538311660289765, + 1.1328668594360352, + 0.22632671892642975, + 0.7918671369552612, + 0.33401933312416077, + 0.7306135296821594, + 0.3548600673675537, + 0.12506209313869476, + 0.8573207855224609, + -0.5818327069282532, + -0.6953738927841187, + -1.6171947717666626, + -0.1699674427509308, + 0.6318262815475464, + -0.05671752244234085, + -0.28145185112953186, + -0.3976689279079437, + -0.2041076272726059, + -0.5495951175689697, + -0.5152917504310608, + -0.9309796094894408, + 0.101932130753994, + 0.1367802917957306, + 0.1490798443555832, + 0.5304336547851563, + -0.5082434415817261, + 0.06688683480024338, + 0.14657628536224365, + -0.782435953617096, + 0.2962816655635834, + 0.6965363621711731, + 0.8496337532997131, + -0.3042965829372406, + 0.04343798756599426, + 0.0330701619386673, + -0.5662598013877869, + 1.1086925268173218, + 0.756072998046875, + -0.204134538769722, + 0.2404300570487976, + -0.47848284244537354, + 1.3659011125564575, + 0.5645433068275452, + -0.15836156904697418, + 0.43395575881004333, + 0.5944653749465942, + 1.0043466091156006, + -0.49446743726730347, + -0.5954391360282898, + 0.5341240763664246, + 0.020598189905285835, + -0.4036853015422821, + 0.4473709762096405, + 1.1998231410980225, + -0.9317775368690492, + -0.23321466147899628, + 0.2052552700042725, + -0.7423108816146851, + -0.19917210936546328, + -0.1722569614648819, + -0.034072667360305786, + -0.00671181408688426, + 0.46396249532699585, + -0.1372445821762085, + 0.053376372903585434, + 0.7392690777778625, + -0.38447609543800354, + 0.07497968524694443, + 0.5197252631187439, + 1.3746477365493774, + 0.9060075879096984, + 0.20000585913658145, + -0.4053704142570496, + 0.7497360110282898, + -0.34087055921554565, + -1.101803183555603, + 0.273650586605072, + -0.5125769376754761, + 0.22472351789474487, + 0.480757474899292, + -0.19845178723335263, + 0.8857700824737549, + 0.30752456188201904, + 1.1109285354614258, + -0.6768012642860413, + 0.524367094039917, + -0.22495046257972717, + -0.4224412739276886, + 0.40753406286239624, + -0.23133376240730288, + 0.3297771215438843, + 0.4905449151992798, + -0.6813114285469055, + -0.7543983459472656, + -0.5599071383476257, + 0.14351597428321838, + -0.029278717935085297, + -0.3970443606376648, + -0.303079217672348, + 0.24161772429943085, + 0.008353390730917454, + -0.0062365154735744, + 1.0824860334396362, + -0.3704061508178711, + -1.0337258577346802, + 0.04638749733567238, + 1.163011074066162, + -0.31737643480300903, + 0.013986887410283089, + 0.19223114848136905, + -0.2260770797729492, + -0.210910826921463, + -1.0191949605941772, + 0.22356095910072327, + 0.09353553503751756, + 0.18096882104873657, + 0.14867214858531952, + 0.43408671021461487, + -0.33312076330184937, + 0.8173948526382446, + 0.6428242921829224, + 0.20215003192424777, + -0.6634518504142761, + -0.4132290482521057, + 0.29815030097961426, + -1.579406976699829, + -0.0981958732008934, + -0.03941014781594277, + 0.1709178239107132, + -0.5481140613555908, + -0.5338194966316223, + -0.3528362512588501, + -0.11561278253793716, + -0.21793591976165771, + -1.1570470333099363, + 0.2157980799674988, + 0.42083489894866943, + 0.9639263153076172, + 0.09747201204299928, + 0.15671424567699432, + 0.4034591615200043, + 0.6728067994117737, + -0.5216875672340393, + 0.09657668322324751, + -0.2416689097881317, + 0.747975766658783, + 0.1021689772605896, + 0.11652665585279463, + -1.0484966039657593, + 0.8489304780960083, + 0.7169828414916992, + -0.09012343734502792, + -1.3173753023147583, + 0.057890523225069046, + -0.006231260951608419, + -0.1018214002251625, + 0.936040461063385, + -0.0502331368625164, + -0.4284322261810303, + -0.38209280371665955, + -0.22668412327766416, + 0.0782942995429039, + -0.4881664514541626, + 0.9268959760665894, + 0.001867273123934865, + 0.42261114716529846, + 0.8283362984657288, + 0.4256294071674347, + -0.7965338826179504, + 0.4840078353881836, + -0.19861412048339844, + 0.33977967500686646, + -0.4604192078113556, + -0.3107339143753052, + -0.2839638590812683, + -1.5734281539916992, + 0.005220232997089624, + 0.09239906817674635, + -0.7828494906425476, + -0.1397123783826828, + 0.2576255202293396, + 0.21372435986995697, + -0.23169949650764465, + 0.4016408920288086, + -0.462497353553772, + -0.2186472862958908, + -0.5617868900299072, + -0.3649831712245941, + -1.1585862636566162, + -0.08222806453704834, + 0.931126832962036, + 0.4327389597892761, + -0.46451422572135925, + -0.5430706143379211, + -0.27434298396110535, + -0.9479129314422609, + 0.1845661848783493, + 0.3972720205783844, + 0.4883299469947815, + 1.04031240940094 + ] + ], + "regenerate": true + } } } diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap new file mode 100644 index 000000000..4bd0e2c3e --- /dev/null +++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-9.snap @@ -0,0 +1,780 @@ +--- +source: dump/src/reader/mod.rs +expression: document +--- +{ + "id": "e0", + "desc": "overriden vector", + "_vectors": { + "default": [ + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1, + 0.1 + ] + } +} diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index 679555a89..04b6dc266 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -27,6 +27,7 @@ pub enum Condition<'a> { LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, Contains { keyword: Token<'a>, word: Token<'a> }, + StartsWith { keyword: Token<'a>, word: Token<'a> }, } /// condition = value ("==" | ">" ...) value @@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult { )) } +/// starts with = value "CONTAINS" value +pub fn parse_starts_with(input: Span) -> IResult { + let (input, (fid, starts_with, value)) = + tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?; + Ok(( + input, + FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + }, + )) +} + +/// starts with = value "NOT" WS+ "CONTAINS" value +pub fn parse_not_starts_with(input: Span) -> IResult { + let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH"))); + let (input, (fid, (_not, _spaces, starts_with), value)) = + tuple((parse_value, keyword, cut(parse_value)))(input)?; + + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { + fid, + op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value }, + })), + )) +} + /// to = value value "TO" WS+ value pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, _, to)) = diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index f530cc690..122396b87 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> { } ErrorKind::InvalidPrimary => { let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } ErrorKind::InvalidEscapedNumber => { writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d06154f25..cfe009acb 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -49,7 +49,7 @@ use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; use condition::{ parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, - parse_is_null, parse_not_contains, parse_not_exists, + parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with, }; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; @@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> { | Condition::LowerThan(_) | Condition::LowerThanOrEqual(_) | Condition::Between { .. } => None, - Condition::Contains { keyword, word: _ } => Some(keyword), + Condition::Contains { keyword, word: _ } + | Condition::StartsWith { keyword, word: _ } => Some(keyword), }, FilterCondition::Not(this) => this.use_contains_operator(), FilterCondition::Or(seq) | FilterCondition::And(seq) => { @@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_to, parse_contains, parse_not_contains, + parse_starts_with, + parse_not_starts_with, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo, parse_geo_distance, @@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> { Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"), Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"), + Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"), } } } @@ -680,6 +684,13 @@ pub mod tests { insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); + // Test STARTS WITH + NOT STARTS WITH + insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})"); + insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}"); + insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})"); + // Test nested NOT insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); @@ -751,7 +762,7 @@ pub mod tests { "###); insta::assert_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -761,12 +772,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); @@ -851,12 +862,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -919,35 +930,35 @@ pub mod tests { "###); insta::assert_snapshot!(p(r#"value NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. 1:11 value NULL "###); insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. 1:15 value NOT NULL "###); insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. 1:12 value EMPTY "###); insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. 1:16 value NOT EMPTY "###); insta::assert_snapshot!(p(r#"value IS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. 1:9 value IS "###); insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. 1:13 value IS NOT "###); insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. 1:16 value IS EXISTS "###); insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. 1:20 value IS NOT EXISTS "###); } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 06ec1daef..5912f6900 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool { | "NULL" | "EMPTY" | "CONTAINS" + | "STARTS" + | "WITH" | "_geoRadius" | "_geoBoundingBox" ) diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml index 6f099a025..432a86382 100644 --- a/index-scheduler/Cargo.toml +++ b/index-scheduler/Cargo.toml @@ -40,7 +40,7 @@ ureq = "2.10.0" uuid = { version = "1.10.0", features = ["serde", "v4"] } [dev-dependencies] -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } big_s = "1.0.2" crossbeam = "0.8.4" insta = { version = "1.39.0", features = ["json", "redactions"] } diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index c998ff444..f4ac80511 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -87,7 +87,7 @@ impl RoFeatures { Ok(()) } else { Err(FeatureNotEnabledError { - disabled_action: "Using `CONTAINS` in a filter", + disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter", feature: "contains filter", issue_link: "https://github.com/orgs/meilisearch/discussions/763", } diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 753e8c179..fe8244f9b 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -1477,7 +1477,7 @@ impl IndexScheduler { .map( |IndexEmbeddingConfig { name, - config: milli::vector::EmbeddingConfig { embedder_options, prompt }, + config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized }, .. }| { let prompt = @@ -1486,7 +1486,10 @@ impl IndexScheduler { { let embedders = self.embedders.read().unwrap(); if let Some(embedder) = embedders.get(&embedder_options) { - return Ok((name, (embedder.clone(), prompt))); + return Ok(( + name, + (embedder.clone(), prompt, quantized.unwrap_or_default()), + )); } } @@ -1500,7 +1503,7 @@ impl IndexScheduler { let mut embedders = self.embedders.write().unwrap(); embedders.insert(embedder_options, embedder.clone()); } - Ok((name, (embedder, prompt))) + Ok((name, (embedder, prompt, quantized.unwrap_or_default()))) }, ) .collect(); @@ -5197,7 +5200,7 @@ mod tests { let simple_hf_name = name.clone(); let configs = index_scheduler.embedders(configs).unwrap(); - let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap(); + let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap(); let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap(); let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap(); @@ -5519,6 +5522,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[1, 2]>, }, @@ -5531,28 +5535,8 @@ mod tests { // the document with the id 3 should keep its original embedding let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); - let mut embeddings = Vec::new(); - - 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; - }; - - let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap(); - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } - } + let embeddings = index.embeddings(&rtxn, docid).unwrap(); + let embeddings = &embeddings["my_doggo_embedder"]; snapshot!(embeddings.len(), @"1"); assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); @@ -5737,6 +5721,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[0]>, }, @@ -5780,6 +5765,7 @@ mod tests { 400, ), }, + quantized: None, }, user_provided: RoaringBitmap<[]>, }, diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index 5eccdc57a..41cfcfdab 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index e7c7382d5..e6d0d8232 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index ac3b3f2d9..bd4cf0c09 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index e67ef0e51..746c7c870 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index 84d8486e1..15cfd732a 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index 6ef17024d..9b5c6ce4c 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index cf710b40f..37f0a062d 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index 9b5b465ab..3906fc6fc 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 73306c4dc..cb4937e57 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -66,3 +66,5 @@ khmer = ["milli/khmer"] vietnamese = ["milli/vietnamese"] # force swedish character recomposition swedish-recomposition = ["milli/swedish-recomposition"] +# force german character recomposition +german = ["milli/german"] diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 0099cada5..f755998a1 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -238,8 +238,14 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ; +InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; @@ -389,7 +395,10 @@ impl ErrorCode for milli::Error { | UserError::InvalidSettingsDimensions { .. } | UserError::InvalidUrl { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } - | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, + | UserError::InvalidPrompt(_) + | UserError::InvalidDisableBinaryQuantization { .. } => { + Code::InvalidSettingsEmbedders + } UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, diff --git a/meilisearch-types/src/index_uid.rs b/meilisearch-types/src/index_uid.rs index 341ab02cb..d64a6658d 100644 --- a/meilisearch-types/src/index_uid.rs +++ b/meilisearch-types/src/index_uid.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::error::Error; use std::fmt; use std::str::FromStr; @@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode}; /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long -#[derive(Debug, Clone, PartialEq, Eq, Deserr)] +#[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] pub struct IndexUid(String); @@ -70,6 +71,12 @@ impl From for String { } } +impl Borrow for IndexUid { + fn borrow(&self) -> &String { + &self.0 + } +} + #[derive(Debug)] pub struct IndexUidFormatError { pub invalid_uid: String, diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index 36c45aac3..8d746779e 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -39,12 +39,14 @@ macro_rules! make_locale { pub enum Locale { $($iso_639_1,)+ $($iso_639_3,)+ + Cmn, } impl From for Locale { fn from(other: milli::tokenizer::Language) -> Locale { match other { $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ + milli::tokenizer::Language::Cmn => Locale::Cmn, } } } @@ -54,6 +56,7 @@ macro_rules! make_locale { match other { $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ + Locale::Cmn => milli::tokenizer::Language::Cmn, } } } @@ -65,6 +68,7 @@ macro_rules! make_locale { let locale = match s { $($iso_639_1_str => Locale::$iso_639_1,)+ $($iso_639_3_str => Locale::$iso_639_3,)+ + "cmn" => Locale::Cmn, _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), }; @@ -79,8 +83,9 @@ macro_rules! make_locale { impl std::fmt::Display for LocaleFormatError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"]; + valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right))); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", ")) } } @@ -99,7 +104,6 @@ make_locale!( (Bg, "bg") => (Bul, "bul"), (Ca, "ca") => (Cat, "cat"), (Cs, "cs") => (Ces, "ces"), - (Zh, "zh") => (Cmn, "cmn"), (Da, "da") => (Dan, "dan"), (De, "de") => (Deu, "deu"), (El, "el") => (Ell, "ell"), @@ -157,5 +161,6 @@ make_locale!( (Uz, "uz") => (Uzb, "uzb"), (Vi, "vi") => (Vie, "vie"), (Yi, "yi") => (Yid, "yid"), + (Zh, "zh") => (Zho, "zho"), (Zu, "zu") => (Zul, "zul"), ); diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index e614ecc6a..2a16e1017 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"] khmer = ["meilisearch-types/khmer"] vietnamese = ["meilisearch-types/vietnamese"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"] +german = ["meilisearch-types/german"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 07350d506..f8d6a0fdc 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -646,8 +646,6 @@ pub struct SearchAggregator { max_vector_size: usize, // Whether the semantic ratio passed to a hybrid search equals the default ratio. semantic_ratio: bool, - // Whether a non-default embedder was specified - embedder: bool, hybrid: bool, retrieve_vectors: bool, @@ -795,7 +793,6 @@ impl SearchAggregator { if let Some(hybrid) = hybrid { ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); - ret.embedder = hybrid.embedder.is_some(); ret.hybrid = true; } @@ -863,7 +860,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -923,7 +919,6 @@ impl SearchAggregator { self.retrieve_vectors |= retrieve_vectors; self.semantic_ratio |= semantic_ratio; self.hybrid |= hybrid; - self.embedder |= embedder; // pagination self.max_limit = self.max_limit.max(max_limit); @@ -999,7 +994,6 @@ impl SearchAggregator { show_ranking_score, show_ranking_score_details, semantic_ratio, - embedder, hybrid, total_degraded, total_used_negative_operator, @@ -1051,7 +1045,6 @@ impl SearchAggregator { "hybrid": { "enabled": hybrid, "semantic_ratio": semantic_ratio, - "embedder": embedder, }, "pagination": { "max_limit": max_limit, @@ -1782,7 +1775,6 @@ pub struct SimilarAggregator { used_syntax: HashMap, // Whether a non-default embedder was specified - embedder: bool, retrieve_vectors: bool, // pagination @@ -1803,7 +1795,7 @@ impl SimilarAggregator { pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { let SimilarQuery { id: _, - embedder, + embedder: _, offset, limit, attributes_to_retrieve: _, @@ -1851,7 +1843,6 @@ impl SimilarAggregator { ret.show_ranking_score_details = *show_ranking_score_details; ret.ranking_score_threshold = ranking_score_threshold.is_some(); - ret.embedder = embedder.is_some(); ret.retrieve_vectors = *retrieve_vectors; ret @@ -1883,7 +1874,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = other; @@ -1914,7 +1904,6 @@ impl SimilarAggregator { *used_syntax = used_syntax.saturating_add(value); } - self.embedder |= embedder; self.retrieve_vectors |= retrieve_vectors; // pagination @@ -1948,7 +1937,6 @@ impl SimilarAggregator { max_attributes_to_retrieve, show_ranking_score, show_ranking_score_details, - embedder, ranking_score_threshold, retrieve_vectors, } = self; @@ -1980,9 +1968,6 @@ impl SimilarAggregator { "vector": { "retrieve_vectors": retrieve_vectors, }, - "hybrid": { - "embedder": embedder, - }, "pagination": { "max_limit": max_limit, "max_offset": max_offset, diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 41473245e..5c4ce171f 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; +use meilisearch_types::milli::OrderBy; use serde_json::Value; use tokio::task::JoinError; @@ -27,10 +28,20 @@ pub enum MeilisearchHttpError { EmptyFilter, #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] InvalidExpression(&'static [&'static str], Value), - #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] + #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] FederationOptionsInNonFederatedRequest(usize), - #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] + #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] PaginationInFederatedQuery(usize, &'static str), + #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] + FacetsInFederatedQuery(usize, String, Vec), + #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")] + InconsistentFacetOrder { + facet: String, + previous_facet_order: OrderBy, + previous_uid: String, + index_facet_order: OrderBy, + current_uid: String, + }, #[error("A {0} payload is missing.")] MissingPayload(PayloadType), #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] @@ -61,7 +72,7 @@ pub enum MeilisearchHttpError { DocumentFormat(#[from] DocumentFormatError), #[error(transparent)] Join(#[from] JoinError), - #[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")] + #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")] MissingSearchHybrid, } @@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { Code::InvalidMultiSearchQueryPagination } + MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets, + MeilisearchHttpError::InconsistentFacetOrder { .. } => { + Code::InvalidMultiSearchFacetOrder + } } } } diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 362bc9937..6a8eee521 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet { } } -impl From for SearchQuery { - fn from(other: SearchQueryGet) -> Self { +impl TryFrom for SearchQuery { + type Error = ResponseError; + + fn try_from(other: SearchQueryGet) -> Result { let filter = match other.filter { Some(f) => match serde_json::from_str(&f) { Ok(v) => Some(v), @@ -140,19 +142,28 @@ impl From for SearchQuery { let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) { (None, None) => None, - (None, Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None }) + (None, Some(_)) => { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(), + meilisearch_types::error::Code::InvalidHybridQuery, + )); + } + (Some(embedder), None) => { + Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder }) } - (Some(embedder), None) => Some(HybridQuery { - semantic_ratio: DEFAULT_SEMANTIC_RATIO(), - embedder: Some(embedder), - }), (Some(embedder), Some(semantic_ratio)) => { - Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) }) + Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder }) } }; - Self { + if other.vector.is_some() && hybrid.is_none() { + return Err(ResponseError::from_msg( + "`hybridEmbedder` is mandatory when `vector` is present".into(), + meilisearch_types::error::Code::MissingSearchHybrid, + )); + } + + Ok(Self { q: other.q, vector: other.vector.map(CS::into_inner), offset: other.offset.0, @@ -179,7 +190,7 @@ impl From for SearchQuery { hybrid, ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), locales: other.locales.map(|o| o.into_iter().collect()), - } + }) } } @@ -219,7 +230,7 @@ pub async fn search_with_url_query( debug!(parameters = ?params, "Search get"); let index_uid = IndexUid::try_from(index_uid.into_inner())?; - let mut query: SearchQuery = params.into_inner().into(); + let mut query: SearchQuery = params.into_inner().try_into()?; // Tenant token search_rules. if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { @@ -312,44 +323,36 @@ pub fn search_kind( features.check_vector("Passing `hybrid` as a parameter")?; } - // regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing - if query.vector.is_none() { - match &query.q { - Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly), - None => return Ok(SearchKind::KeywordOnly), - _ => {} + // handle with care, the order of cases matters, the semantics is subtle + match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) { + // empty query, no vector => placeholder search + (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly), + // no query, no vector => placeholder search + (None, _, None) => Ok(SearchKind::KeywordOnly), + // hybrid.semantic_ratio == 1.0 => vector + (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => { + SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len())) } - } - - match &query.hybrid { - Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => { - Ok(SearchKind::semantic( - index_scheduler, - index, - embedder.as_deref(), - query.vector.as_ref().map(Vec::len), - )?) - } - Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => { + // hybrid.semantic_ratio == 0.0 => keyword + (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => { Ok(SearchKind::KeywordOnly) } - Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid( + // no query, hybrid, vector => semantic + (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => { + SearchKind::semantic(index_scheduler, index, embedder, Some(v.len())) + } + // query, no hybrid, no vector => keyword + (Some(_), None, None) => Ok(SearchKind::KeywordOnly), + // query, hybrid, maybe vector => hybrid + (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid( index_scheduler, index, - embedder.as_deref(), + embedder, **semantic_ratio, - query.vector.as_ref().map(Vec::len), - )?), - None => match (query.q.as_deref(), query.vector.as_deref()) { - (_query, None) => Ok(SearchKind::KeywordOnly), - (None, Some(_vector)) => Ok(SearchKind::semantic( - index_scheduler, - index, - None, - query.vector.as_ref().map(Vec::len), - )?), - (Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), - }, + v.map(|v| v.len()), + ), + + (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()), } } diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index ceea17668..aaf8673d0 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -643,12 +643,19 @@ fn embedder_analytics( .max() }); + let binary_quantization_used = setting.as_ref().map(|map| { + map.values() + .filter_map(|config| config.clone().set()) + .any(|config| config.binary_quantized.set().is_some()) + }); + json!( { "total": setting.as_ref().map(|s| s.len()), "sources": sources, "document_template_used": document_template_used, - "document_template_max_bytes": document_template_max_bytes + "document_template_max_bytes": document_template_max_bytes, + "binary_quantization_used": binary_quantization_used, } ) } diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 5027a473e..f94a02987 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -102,8 +102,8 @@ async fn similar( let index = index_scheduler.index(&index_uid)?; - let (embedder_name, embedder) = - SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; + let (embedder_name, embedder, quantized) = + SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?; tokio::task::spawn_blocking(move || { perform_similar( @@ -111,6 +111,7 @@ async fn similar( query, embedder_name, embedder, + quantized, retrieve_vectors, index_scheduler.features(), ) @@ -139,8 +140,8 @@ pub struct SimilarQueryGet { show_ranking_score_details: Param, #[deserr(default, error = DeserrQueryParamError, default)] pub ranking_score_threshold: Option, - #[deserr(default, error = DeserrQueryParamError)] - pub embedder: Option, + #[deserr(error = DeserrQueryParamError)] + pub embedder: String, } #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 58005ec53..5279c26bb 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec}; use actix_http::StatusCode; use index_scheduler::{IndexScheduler, RoFeatures}; +use indexmap::IndexMap; use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::error::deserr_codes::{ - InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, + InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, + InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, + InvalidSearchOffset, }; use meilisearch_types::error::ResponseError; +use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; -use meilisearch_types::milli::{self, DocumentId, TimeBudget}; +use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; use roaring::RoaringBitmap; use serde::Serialize; use super::ranking_rules::{self, RankingRules}; use super::{ - prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, - SearchQuery, SearchQueryWithIndex, + compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, + HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, }; use crate::error::MeilisearchHttpError; use crate::routes::indexes::search::search_kind; @@ -73,6 +77,17 @@ pub struct Federation { pub limit: usize, #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError)] pub offset: usize, + #[deserr(default, error = DeserrJsonError)] + pub facets_by_index: BTreeMap>>, + #[deserr(default, error = DeserrJsonError)] + pub merge_facets: Option, +} + +#[derive(Copy, Clone, Debug, deserr::Deserr, Default)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +pub struct MergeFacets { + #[deserr(default, error = DeserrJsonError)] + pub max_values_per_facet: Option, } #[derive(Debug, deserr::Deserr)] @@ -82,7 +97,7 @@ pub struct FederatedSearch { #[deserr(default)] pub federation: Option, } -#[derive(Serialize, Clone, PartialEq)] +#[derive(Serialize, Clone)] #[serde(rename_all = "camelCase")] pub struct FederatedSearchResult { pub hits: Vec, @@ -93,6 +108,13 @@ pub struct FederatedSearchResult { #[serde(skip_serializing_if = "Option::is_none")] pub semantic_hit_count: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_distribution: Option>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub facet_stats: Option>, + #[serde(skip_serializing_if = "FederatedFacets::is_empty")] + pub facets_by_index: FederatedFacets, + // These fields are only used for analytics purposes #[serde(skip)] pub degraded: bool, @@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult { semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, } = self; let mut debug = f.debug_struct("SearchResult"); @@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult { if *degraded { debug.field("degraded", degraded); } + if let Some(facet_distribution) = facet_distribution { + debug.field("facet_distribution", &facet_distribution); + } + if let Some(facet_stats) = facet_stats { + debug.field("facet_stats", &facet_stats); + } if let Some(semantic_hit_count) = semantic_hit_count { debug.field("semantic_hit_count", &semantic_hit_count); } + if !facets_by_index.is_empty() { + debug.field("facets_by_index", &facets_by_index); + } debug.finish() } @@ -313,16 +347,104 @@ struct SearchHitByIndex { } struct SearchResultByIndex { + index: String, hits: Vec, - candidates: RoaringBitmap, + estimated_total_hits: usize, degraded: bool, used_negative_operator: bool, + facets: Option, +} + +#[derive(Debug, Clone, Default, Serialize)] +pub struct FederatedFacets(pub BTreeMap); + +impl FederatedFacets { + pub fn insert(&mut self, index: String, facets: Option) { + if let Some(facets) = facets { + self.0.insert(index, facets); + } + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn merge( + self, + MergeFacets { max_values_per_facet }: MergeFacets, + facet_order: BTreeMap, + ) -> Option { + if self.is_empty() { + return None; + } + + let mut distribution: BTreeMap = Default::default(); + let mut stats: BTreeMap = Default::default(); + + for facets_by_index in self.0.into_values() { + for (facet, index_distribution) in facets_by_index.distribution { + match distribution.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_distribution); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let distribution = entry.get_mut(); + + for (value, index_count) in index_distribution { + distribution + .entry(value) + .and_modify(|count| *count += index_count) + .or_insert(index_count); + } + } + } + } + + for (facet, index_stats) in facets_by_index.stats { + match stats.entry(facet) { + std::collections::btree_map::Entry::Vacant(entry) => { + entry.insert(index_stats); + } + std::collections::btree_map::Entry::Occupied(mut entry) => { + let stats = entry.get_mut(); + + stats.min = f64::min(stats.min, index_stats.min); + stats.max = f64::max(stats.max, index_stats.max); + } + } + } + } + + // fixup order + for (facet, values) in &mut distribution { + let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); + + match order_by { + OrderBy::Lexicographic => { + values.sort_unstable_by(|left, _, right, _| left.cmp(right)) + } + OrderBy::Count => { + values.sort_unstable_by(|_, left, _, right| { + left.cmp(right) + // biggest first + .reverse() + }) + } + } + + if let Some(max_values_per_facet) = max_values_per_facet { + values.truncate(max_values_per_facet) + }; + } + + Some(ComputedFacets { distribution, stats }) + } } pub fn perform_federated_search( index_scheduler: &IndexScheduler, queries: Vec, - federation: Federation, + mut federation: Federation, features: RoFeatures, ) -> Result { let before_search = std::time::Instant::now(); @@ -342,6 +464,16 @@ pub fn perform_federated_search( .into()); } + if let Some(facets) = federated_query.has_facets() { + let facets = facets.to_owned(); + return Err(MeilisearchHttpError::FacetsInFederatedQuery( + query_index, + federated_query.index_uid.into_inner(), + facets, + ) + .into()); + } + let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { @@ -353,13 +485,24 @@ pub fn perform_federated_search( // 2. perform queries, merge and make hits index by index let required_hit_count = federation.limit + federation.offset; + // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // Then in step (3), we'll update its value if there is any semantic search let mut semantic_hit_count = None; let mut results_by_index = Vec::with_capacity(queries_by_index.len()); let mut previous_query_data: Option<(RankingRules, usize, String)> = None; + // remember the order and name of first index for each facet when merging with index settings + // to detect if the order is inconsistent for a facet. + let mut facet_order: Option> = match federation.merge_facets + { + Some(MergeFacets { .. }) => Some(Default::default()), + _ => None, + }; + for (index_uid, queries) in queries_by_index { + let first_query_index = queries.first().map(|query| query.query_index); + let index = match index_scheduler.index(&index_uid) { Ok(index) => index, Err(err) => { @@ -367,9 +510,8 @@ pub fn perform_federated_search( // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but // here the resource not found is not part of the URL. err.code = StatusCode::BAD_REQUEST; - if let Some(query) = queries.first() { - err.message = - format!("Inside `.queries[{}]`: {}", query.query_index, err.message); + if let Some(query_index) = first_query_index { + err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); } return Err(err); } @@ -394,6 +536,23 @@ pub fn perform_federated_search( let mut used_negative_operator = false; let mut candidates = RoaringBitmap::new(); + let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); + + // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", + if let Some(query_index) = first_query_index { + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + return Err(error); + } + // 2.1. Compute all candidates for each query in the index let mut results_by_query = Vec::with_capacity(queries.len()); @@ -562,34 +721,116 @@ pub fn perform_federated_search( .collect(); let merged_result = merged_result?; + + let estimated_total_hits = candidates.len() as usize; + + let facets = facets_by_index + .map(|facets_by_index| { + compute_facet_distribution_stats( + &facets_by_index, + &index, + &rtxn, + candidates, + super::Route::MultiSearch, + ) + }) + .transpose() + .map_err(|mut error| { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", + error.message, + if let Some(query_index) = first_query_index { + format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") + } else { + Default::default() + } + ); + error + })?; + results_by_index.push(SearchResultByIndex { + index: index_uid, hits: merged_result, - candidates, + estimated_total_hits, degraded, used_negative_operator, + facets, }); } + // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. + for (index_uid, facets) in federation.facets_by_index { + let index = match index_scheduler.index(&index_uid) { + Ok(index) => index, + Err(err) => { + let mut err = ResponseError::from(err); + // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but + // here the resource not found is not part of the URL. + err.code = StatusCode::BAD_REQUEST; + err.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", + err.message + ); + return Err(err); + } + }; + + // Important: this is the only transaction we'll use for this index during this federated search + let rtxn = index.read_txn()?; + + if let Err(mut error) = + check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) + { + error.message = format!( + "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", + ); + return Err(error); + } + + if let Some(facets) = facets { + if let Err(mut error) = compute_facet_distribution_stats( + &facets, + &index, + &rtxn, + Default::default(), + super::Route::MultiSearch, + ) { + error.message = + format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); + return Err(error); + } + } + } + // 3. merge hits and metadata across indexes // 3.1 merge metadata - let (estimated_total_hits, degraded, used_negative_operator) = { + let (estimated_total_hits, degraded, used_negative_operator, facets) = { let mut estimated_total_hits = 0; let mut degraded = false; let mut used_negative_operator = false; + let mut facets: FederatedFacets = FederatedFacets::default(); + for SearchResultByIndex { + index, hits: _, - candidates, + estimated_total_hits: estimated_total_hits_by_index, + facets: facets_by_index, degraded: degraded_by_index, used_negative_operator: used_negative_operator_by_index, - } in &results_by_index + } in &mut results_by_index { - estimated_total_hits += candidates.len() as usize; + estimated_total_hits += *estimated_total_hits_by_index; degraded |= *degraded_by_index; used_negative_operator |= *used_negative_operator_by_index; + + let facets_by_index = std::mem::take(facets_by_index); + let index = std::mem::take(index); + + facets.insert(index, facets_by_index); } - (estimated_total_hits, degraded, used_negative_operator) + (estimated_total_hits, degraded, used_negative_operator, facets) }; // 3.2 merge hits @@ -606,6 +847,20 @@ pub fn perform_federated_search( .map(|hit| hit.hit) .collect(); + let (facet_distribution, facet_stats, facets_by_index) = + match federation.merge_facets.zip(facet_order) { + Some((merge_facets, facet_order)) => { + let facets = facets.merge(merge_facets, facet_order); + + let (facet_distribution, facet_stats) = facets + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); + + (facet_distribution, facet_stats, FederatedFacets::default()) + } + None => (None, None, facets), + }; + let search_result = FederatedSearchResult { hits: merged_hits, processing_time_ms: before_search.elapsed().as_millis(), @@ -617,7 +872,39 @@ pub fn perform_federated_search( semantic_hit_count, degraded, used_negative_operator, + facet_distribution, + facet_stats, + facets_by_index, }; Ok(search_result) } + +fn check_facet_order( + facet_order: &mut Option>, + current_index: &str, + facets_by_index: &Option>, + index: &milli::Index, + rtxn: &milli::heed::RoTxn<'_>, +) -> Result<(), ResponseError> { + if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { + let index_facet_order = index.sort_facet_values_by(rtxn)?; + for facet in facets_by_index { + let index_facet_order = index_facet_order.get(facet); + let (previous_index, previous_facet_order) = facet_order + .entry(facet.to_owned()) + .or_insert_with(|| (current_index.to_owned(), index_facet_order)); + if previous_facet_order != &index_facet_order { + return Err(MeilisearchHttpError::InconsistentFacetOrder { + facet: facet.clone(), + previous_facet_order: *previous_facet_order, + previous_uid: previous_index.clone(), + current_uid: current_index.to_owned(), + index_facet_order, + } + .into()); + } + } + }; + Ok(()) +} diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 915505be0..66b6e56de 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery { pub struct HybridQuery { #[deserr(default, error = DeserrJsonError, default)] pub semantic_ratio: SemanticRatio, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, } #[derive(Clone)] pub enum SearchKind { KeywordOnly, - SemanticOnly { embedder_name: String, embedder: Arc }, - Hybrid { embedder_name: String, embedder: Arc, semantic_ratio: f32 }, + SemanticOnly { embedder_name: String, embedder: Arc, quantized: bool }, + Hybrid { embedder_name: String, embedder: Arc, quantized: bool, semantic_ratio: f32 }, } impl SearchKind { pub(crate) fn semantic( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::SemanticOnly { embedder_name, embedder }) + Ok(Self::SemanticOnly { embedder_name, embedder, quantized }) } pub(crate) fn hybrid( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, semantic_ratio: f32, vector_len: Option, ) -> Result { - let (embedder_name, embedder) = + let (embedder_name, embedder, quantized) = Self::embedder(index_scheduler, index, embedder_name, vector_len)?; - Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio }) + Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio }) } pub(crate) fn embedder( index_scheduler: &index_scheduler::IndexScheduler, index: &Index, - embedder_name: Option<&str>, + embedder_name: &str, vector_len: Option, - ) -> Result<(String, Arc), ResponseError> { + ) -> Result<(String, Arc, bool), ResponseError> { let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedders = index_scheduler.embedders(embedder_configs)?; - let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name()); - - let embedder = embedders.get(embedder_name); - - let embedder = embedder + let (embedder, _, quantized) = embedders + .get(embedder_name) .ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned())) - .map_err(milli::Error::from)? - .0; + .map_err(milli::Error::from)?; if let Some(vector_len) = vector_len { if vector_len != embedder.dimensions() { @@ -332,7 +328,7 @@ impl SearchKind { } } - Ok((embedder_name.to_owned(), embedder)) + Ok((embedder_name.to_owned(), embedder, quantized)) } } @@ -441,9 +437,6 @@ pub struct SearchQueryWithIndex { } impl SearchQueryWithIndex { - pub fn has_federation_options(&self) -> bool { - self.federation_options.is_some() - } pub fn has_pagination(&self) -> Option<&'static str> { if self.offset.is_some() { Some("offset") @@ -458,6 +451,10 @@ impl SearchQueryWithIndex { } } + pub fn has_facets(&self) -> Option<&[String]> { + self.facets.as_deref().filter(|v| !v.is_empty()) + } + pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option) { let SearchQueryWithIndex { index_uid, @@ -537,8 +534,8 @@ pub struct SimilarQuery { pub limit: usize, #[deserr(default, error = DeserrJsonError)] pub filter: Option, - #[deserr(default, error = DeserrJsonError, default)] - pub embedder: Option, + #[deserr(error = DeserrJsonError)] + pub embedder: String, #[deserr(default, error = DeserrJsonError)] pub attributes_to_retrieve: Option>, #[deserr(default, error = DeserrJsonError)] @@ -792,7 +789,7 @@ fn prepare_search<'t>( search.query(q); } } - SearchKind::SemanticOnly { embedder_name, embedder } => { + SearchKind::SemanticOnly { embedder_name, embedder, quantized } => { let vector = match query.vector.clone() { Some(vector) => vector, None => { @@ -806,14 +803,19 @@ fn prepare_search<'t>( } }; - search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); + search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector)); } - SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => { + SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => { if let Some(q) = &query.q { search.query(q); } // will be embedded in hybrid search if necessary - search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone()); + search.semantic( + embedder_name.clone(), + embedder.clone(), + *quantized, + query.vector.clone(), + ); } } @@ -987,39 +989,13 @@ pub fn perform_search( HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } }; - let (facet_distribution, facet_stats) = match facets { - Some(ref fields) => { - let mut facet_distribution = index.facets_distribution(&rtxn); - - let max_values_by_facet = index - .max_values_per_facet(&rtxn) - .map_err(milli::Error::from)? - .map(|x| x as usize) - .unwrap_or(DEFAULT_VALUES_PER_FACET); - facet_distribution.max_values_per_facet(max_values_by_facet); - - let sort_facet_values_by = - index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; - - if fields.iter().all(|f| f != "*") { - let fields: Vec<_> = - fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); - facet_distribution.facets(fields); - } - - let distribution = facet_distribution - .candidates(candidates) - .default_order_by(sort_facet_values_by.get("*")) - .execute()?; - let stats = facet_distribution.compute_stats()?; - (Some(distribution), Some(stats)) - } - None => (None, None), - }; - - let facet_stats = facet_stats.map(|stats| { - stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() - }); + let (facet_distribution, facet_stats) = facets + .map(move |facets| { + compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search) + }) + .transpose()? + .map(|ComputedFacets { distribution, stats }| (distribution, stats)) + .unzip(); let result = SearchResult { hits: documents, @@ -1035,6 +1011,61 @@ pub fn perform_search( Ok(result) } +#[derive(Debug, Clone, Default, Serialize)] +pub struct ComputedFacets { + pub distribution: BTreeMap>, + pub stats: BTreeMap, +} + +enum Route { + Search, + MultiSearch, +} + +fn compute_facet_distribution_stats>( + facets: &[S], + index: &Index, + rtxn: &RoTxn, + candidates: roaring::RoaringBitmap, + route: Route, +) -> Result { + let mut facet_distribution = index.facets_distribution(rtxn); + + let max_values_by_facet = index + .max_values_per_facet(rtxn) + .map_err(milli::Error::from)? + .map(|x| x as usize) + .unwrap_or(DEFAULT_VALUES_PER_FACET); + + facet_distribution.max_values_per_facet(max_values_by_facet); + + let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; + + // add specific facet if there is no placeholder + if facets.iter().all(|f| f.as_ref() != "*") { + let fields: Vec<_> = + facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect(); + facet_distribution.facets(fields); + } + + let distribution = facet_distribution + .candidates(candidates) + .default_order_by(sort_facet_values_by.get("*")) + .execute() + .map_err(|error| match (error, route) { + ( + error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution { + .. + }), + Route::MultiSearch, + ) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets), + (error, _) => error.into(), + })?; + let stats = facet_distribution.compute_stats()?; + let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); + Ok(ComputedFacets { distribution, stats }) +} + pub fn search_from_kind( search_kind: SearchKind, search: milli::Search<'_>, @@ -1413,6 +1444,7 @@ pub fn perform_similar( query: SimilarQuery, embedder_name: String, embedder: Arc, + quantized: bool, retrieve_vectors: RetrieveVectors, features: RoFeatures, ) -> Result { @@ -1441,8 +1473,16 @@ pub fn perform_similar( )); }; - let mut similar = - milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); + let mut similar = milli::Similar::new( + internal_id, + offset, + limit, + index, + &rtxn, + embedder_name, + embedder, + quantized, + ); if let Some(ref filter) = query.filter { if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 280073f51..4c644ae98 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -525,7 +525,7 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -723,7 +723,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index fee7eef7d..6840f8fba 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() { .search(json!({"filter": "title & Glass"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() { .search(json!({"filter": ["title & Glass"]}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" @@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", + "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "code": "feature_not_enabled", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#feature_not_enabled" diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index ee4181694..e301c0b05 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -128,7 +128,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -137,7 +137,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -146,7 +146,7 @@ async fn simple_search() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -161,7 +161,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -174,7 +174,7 @@ async fn limit_offset() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}), ) .await; snapshot!(code, @"200 OK"); @@ -188,8 +188,11 @@ async fn simple_search_hf() { let server = Server::new().await; let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; - let (response, code) = - index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await; + let (response, code) = index + .search_post( + json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}), + ) + .await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); snapshot!(response["semanticHitCount"], @"0"); @@ -197,7 +200,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( // disable ranking score as the vectors between architectures are not equal - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -206,7 +209,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -215,7 +218,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -224,7 +227,7 @@ async fn simple_search_hf() { let (response, code) = index .search_post( - json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), + json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}), ) .await; snapshot!(code, @"200 OK"); @@ -237,7 +240,7 @@ async fn distribution_shift() { let server = Server::new().await; let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; - let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); + let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}); let (response, code) = index.search_post(search.clone()).await; snapshot!(code, @"200 OK"); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); @@ -271,7 +274,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.2}, + "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true, "attributesToHighlight": [ "desc", @@ -287,7 +290,7 @@ async fn highlighter() { let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 0.8}, + "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -304,7 +307,7 @@ async fn highlighter() { // no highlighting on full semantic let (response, code) = index .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], - "hybrid": {"semanticRatio": 1.0}, + "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true, "showRankingScore": true, "attributesToHighlight": [ @@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_post( - json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}), + json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}), ) .await; snapshot!(code, @"400 Bad Request"); @@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}), ) .unwrap(), ) @@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() { let (response, code) = index .search_get( &yaup::to_string( - &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}), + &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}), ) .unwrap(), ) @@ -398,7 +401,7 @@ async fn single_document() { let (response, code) = index .search_post( - json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -414,7 +417,7 @@ async fn query_combination() { // search without query and vector, but with hybrid => still placeholder let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -423,7 +426,7 @@ async fn query_combination() { // same with a different semantic ratio let (response, code) = index - .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -432,7 +435,7 @@ async fn query_combination() { // wrong vector dimensions let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"400 Bad Request"); @@ -447,7 +450,7 @@ async fn query_combination() { // full vector let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -456,7 +459,7 @@ async fn query_combination() { // full keyword, without a query let (response, code) = index - .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -465,7 +468,7 @@ async fn query_combination() { // query + vector, full keyword => keyword let (response, code) = index - .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) + .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .await; snapshot!(code, @"200 OK"); @@ -480,7 +483,7 @@ async fn query_combination() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.", + "message": "Invalid request: missing `hybrid` parameter when `vector` is present.", "code": "missing_search_hybrid", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#missing_search_hybrid" @@ -490,7 +493,7 @@ async fn query_combination() { // full vector, without a vector => error let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -507,7 +510,7 @@ async fn query_combination() { // hybrid without a vector => full keyword let (response, code) = index .search_post( - json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), + json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}), ) .await; @@ -523,7 +526,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); @@ -573,7 +576,7 @@ async fn retrieve_vectors() { let (response, code) = index .search_post( - json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), + json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}), ) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 4724f975d..c01d854e2 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -922,7 +922,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -935,7 +935,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" @@ -1143,3 +1143,195 @@ async fn facet_search_with_localized_attributes() { } "###); } + +#[actix_rt::test] +async fn swedish_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": "tra1-1", "product": "trä"}, + {"id": "tra2-1", "product": "traktor"}, + {"id": "tra1-2", "product": "träbjälke"}, + {"id": "tra2-2", "product": "trafiksignal"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["swe"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + index + .search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }) + .await; + + // force swedish + index + .search( + json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "trä" + }, + { + "product": "träbjälke" + } + ], + "query": "trä", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + index + .search( + json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "traktor" + }, + { + "product": "trafiksignal" + } + ], + "query": "tra", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} + +#[actix_rt::test] +async fn german_search() { + let server = Server::new().await; + + let index = server.index("test"); + let documents = json!([ + {"id": 1, "product": "Interkulturalität"}, + {"id": 2, "product": "Wissensorganisation"}, + ]); + index.add_documents(documents, None).await; + let (_response, _) = index + .update_settings(json!({ + "searchableAttributes": ["product"], + "localizedAttributes": [ + // force swedish + {"attributePatterns": ["product"], "locales": ["deu"]} + ] + })) + .await; + index.wait_task(1).await; + + // infer swedish + index + .search( + json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Interkulturalität" + } + ], + "query": "kulturalität", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + + index + .search( + json!({"q": "organisation", "attributesToRetrieve": ["product"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "product": "Wissensorganisation" + } + ], + "query": "organisation", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; +} diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 974025652..d1091d944 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -1099,22 +1099,28 @@ async fn experimental_feature_vector_store() { index.add_documents(json!(documents), None).await; index.wait_task(0).await; - index - .search(json!({ + let (response, code) = index + .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true - }), |response, code|{ - meili_snap::snapshot!(code, @"400 Bad Request"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" - { - "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", - "code": "feature_not_enabled", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#feature_not_enabled" - } - "###); - }) + })) .await; + + { + meili_snap::snapshot!(code, @"400 Bad Request"); + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "message": "Passing `vector` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + } + index .search(json!({ "retrieveVectors": true, @@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() { let (response, code) = index .search_post(json!({ "vector": [1.0, 2.0, 3.0], + "hybrid": { + "embedder": "manual", + }, "showRankingScore": true, "retrieveVectors": true, })) diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs index 08ad0b18c..b9593f05f 100644 --- a/meilisearch/tests/search/multi.rs +++ b/meilisearch/tests/search/multi.rs @@ -3799,7 +3799,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `limit` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `limit` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3815,7 +3815,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `offset` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `offset` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3831,7 +3831,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `page` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `page` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3847,7 +3847,7 @@ async fn federation_federated_contains_pagination() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n Hint: remove `hitsPerPage` from query #1 or remove `federation: {}` from the request", + "message": "Inside `.queries[1]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `hitsPerPage` from query #1 or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search", "code": "invalid_multi_search_query_pagination", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_pagination" @@ -3855,6 +3855,214 @@ async fn federation_federated_contains_pagination() { "###); } +#[actix_rt::test] +async fn federation_federated_contains_facets() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // empty facets are actually OK + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": []}, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "name": "Red apple gala", + "id": "red-apple-gala", + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.953042328042328 + } + }, + { + "name": "Exclusive sale: Red delicious apple", + "id": "red-delicious-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.9093915343915344 + } + }, + { + "name": "Exclusive sale: green apple", + "id": "green-apple-boosted", + "BOOST": true, + "_federation": { + "indexUid": "fruits", + "queriesPosition": 0, + "weightedRankingScore": 0.4393939393939394 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": {}, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red", "facets": ["BOOSTED"]}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.queries[1]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #1 or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.fruits: [\"BOOSTED\"]` for facets in federated search", + "code": "invalid_multi_search_query_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_query_facets" + } + "###); +} + +#[actix_rt::test] +async fn federation_non_faceted_for_an_index() { + let server = Server::new().await; + + let index = server.index("fruits"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-name"); + + let (value, _) = index + .update_settings( + json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), + ) + .await; + + index.wait_task(value.uid()).await; + + let index = server.index("fruits-no-facets"); + + let (value, _) = index.update_settings(json!({"searchableAttributes": ["name"]})).await; + + index.wait_task(value.uid()).await; + + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits-no-name", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", + "code": "invalid_multi_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" + } + "###); + + // still fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", + "code": "invalid_multi_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" + } + "###); + + // fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "fruits": ["BOOST", "id", "name"], + "fruits-no-name": ["BOOST", "id"], + "fruits-no-facets": ["BOOST", "id"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.fruits-no-facets`: Invalid facet distribution, this index does not have configured filterable attributes.\n - Note: index `fruits-no-facets` is not used in queries", + "code": "invalid_multi_search_facets", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" + } + "###); + + // also fails + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "zorglub": ["BOOST", "id", "name"], + "fruits": ["BOOST", "id", "name"], + } + }, "queries": [ + {"indexUid" : "fruits", "q": "apple red"}, + {"indexUid": "fruits", "q": "apple red"}, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.zorglub`: Index `zorglub` not found.\n - Note: index `zorglub` is not used in queries", + "code": "index_not_found", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#index_not_found" + } + "###); +} + #[actix_rt::test] async fn federation_non_federated_contains_federation_option() { let server = Server::new().await; @@ -3875,7 +4083,7 @@ async fn federation_non_federated_contains_federation_option() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #1 or add `federation: {}` to the request.", + "message": "Inside `.queries[1]`: Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #1 or add `federation` to the request.", "code": "invalid_multi_search_federation_options", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_federation_options" @@ -4433,3 +4641,1328 @@ async fn federation_vector_two_indexes() { } "###); } + +#[actix_rt::test] +async fn federation_facets_different_indexes_same_facet() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman-2"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // return titles ordered accross indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "batman-2": ["title"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title"], + "batman": ["title"], + "batman-2": ["title"] + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "title": { + "Badman": 2, + "Batman": 2, + "Batman Returns": 2, + "Batman the dark knight returns: Part 1": 2, + "Batman the dark knight returns: Part 2": 2, + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "facetStats": {} + } + "###); + + // mix and match query: will be sorted across indexes + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": [], + "batman": ["title"], + "batman-2": ["title"] + } + }, "queries": [ + {"indexUid" : "batman", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman-2", "q": "badman returns", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies", "q": "captain", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "the bat", "sort": ["title:desc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 2, + "weightedRankingScore": 0.9848484848484848 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 3, + "weightedRankingScore": 0.9528218694885362 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.7028218694885362 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.8317901234567902 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.23106060606060605 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 0, + "weightedRankingScore": 0.5 + } + }, + { + "title": "Badman", + "_federation": { + "indexUid": "batman-2", + "queriesPosition": 1, + "weightedRankingScore": 0.5 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 11, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "batman-2": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": {}, + "stats": {} + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_facets_same_indexes() { + let server = Server::new().await; + + let index = server.index("doggos"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("doggos-2"); + + let documents = NESTED_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "filterableAttributes": ["father", "mother", "doggos.age"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 2, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + } + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetsByIndex": { + "doggos": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1, + "5": 1, + "6": 1 + }, + "father": { + "jean": 1, + "jean-baptiste": 1 + }, + "mother": { + "michelle": 1, + "sophie": 1 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + }, + "doggos-2": { + "distribution": { + "doggos.age": { + "2": 1, + "4": 1 + }, + "father": { + "jean": 1, + "romain": 1 + }, + "mother": { + "michelle": 2 + } + }, + "stats": { + "doggos.age": { + "min": 2.0, + "max": 4.0 + } + } + } + } + } + "###); + + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "doggos": ["father", "mother", "doggos.age"], + "doggos-2": ["father", "mother", "doggos.age"] + }, + "mergeFacets": {}, + }, "queries": [ + {"indexUid" : "doggos", "q": "je", "attributesToRetrieve": ["id"] }, + {"indexUid" : "doggos-2", "q": "michel", "attributesToRetrieve": ["id"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "id": 852, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 951, + "_federation": { + "indexUid": "doggos", + "queriesPosition": 0, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 852, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + }, + { + "id": 750, + "_federation": { + "indexUid": "doggos-2", + "queriesPosition": 1, + "weightedRankingScore": 0.9621212121212122 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4, + "facetDistribution": { + "doggos.age": { + "2": 2, + "4": 2, + "5": 1, + "6": 1 + }, + "father": { + "jean": 2, + "jean-baptiste": 1, + "romain": 1 + }, + "mother": { + "michelle": 3, + "sophie": 1 + } + }, + "facetStats": { + "doggos.age": { + "min": 2.0, + "max": 6.0 + } + } + } + "###); +} + +#[actix_rt::test] +async fn federation_inconsistent_merge_order() { + let server = Server::new().await; + + let index = server.index("movies"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("movies-2"); + + let documents = DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title", "color"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ], + "faceting": { + "sortFacetValuesBy": { "color": "count" } + } + })) + .await; + index.wait_task(value.uid()).await; + + let index = server.index("batman"); + + let documents = SCORE_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await; + + let (value, _) = index + .update_settings(json!({ + "sortableAttributes": ["title"], + "filterableAttributes": ["title"], + "rankingRules": [ + "sort", + "words", + "typo", + "proximity", + "attribute", + "exactness" + ] + })) + .await; + index.wait_task(value.uid()).await; + + // without merging, it works + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetsByIndex": { + "batman": { + "distribution": { + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1, + "Batman the dark knight returns: Part 1": 1, + "Batman the dark knight returns: Part 2": 1 + } + }, + "stats": {} + }, + "movies": { + "distribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3, + "yellow": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + }, + "movies-2": { + "distribution": { + "color": { + "red": 3, + "blue": 3, + "yellow": 2, + "green": 2 + }, + "title": { + "Captain Marvel": 1, + "Escape Room": 1, + "Gläss": 1, + "How to Train Your Dragon: The Hidden World": 1, + "Shazam!": 1 + } + }, + "stats": {} + } + } + } + "###); + + // fails with merging + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title", "color"], + }, + "mergeFacets": {} + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"400 Bad Request"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "message": "Inside `.federation.facetsByIndex.movies-2`: Inconsistent order for values in facet `color`: index `movies` orders alphabetically, but index `movies-2` orders by count.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.\n - Note: index `movies-2` used in `.queries[2]`", + "code": "invalid_multi_search_facet_order", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facet_order" + } + "###); + + // can limit the number of values + let (response, code) = server + .multi_search(json!({"federation": { + "facetsByIndex": { + "movies": ["title", "color"], + "batman": ["title"], + "movies-2": ["title"], + }, + "mergeFacets": { + "maxValuesPerFacet": 3, + } + }, "queries": [ + {"indexUid" : "movies", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "batman", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + {"indexUid" : "movies-2", "q": "", "sort": ["title:asc"], "attributesToRetrieve": ["title"] }, + ]})) + .await; + snapshot!(code, @"200 OK"); + insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" + { + "hits": [ + { + "title": "Badman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman Returns", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 1", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Batman the dark knight returns: Part 2", + "_federation": { + "indexUid": "batman", + "queriesPosition": 1, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Captain Marvel", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Escape Room", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Gläss", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies", + "queriesPosition": 0, + "weightedRankingScore": 1.0 + } + }, + { + "title": "Shazam!", + "_federation": { + "indexUid": "movies-2", + "queriesPosition": 2, + "weightedRankingScore": 1.0 + } + } + ], + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 15, + "facetDistribution": { + "color": { + "blue": 3, + "green": 2, + "red": 3 + }, + "title": { + "Badman": 1, + "Batman": 1, + "Batman Returns": 1 + } + }, + "facetStats": {} + } + "###); +} diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index d0be6562f..228358d54 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -18,7 +18,7 @@ async fn similar_unexisting_index() { }); index - .similar(json!({"id": 287947}), |response, code| { + .similar(json!({"id": 287947, "embedder": "manual"}), |response, code| { assert_eq!(code, 404); assert_eq!(response, expected_response); }) @@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() { let server = Server::new().await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"id": 287947})).await; + let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -199,7 +199,8 @@ async fn similar_not_found_id() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await; + let (response, code) = + index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -230,7 +231,8 @@ async fn similar_bad_offset() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -241,7 +243,7 @@ async fn similar_bad_offset() { } "###); - let (response, code) = index.similar_get("?id=287947&offset=doggo").await; + let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -272,7 +274,8 @@ async fn similar_bad_limit() { snapshot!(code, @"202 Accepted"); server.wait_task(response.uid()).await; - let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -283,7 +286,7 @@ async fn similar_bad_limit() { } "###); - let (response, code) = index.similar_get("?id=287946&limit=doggo").await; + let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -323,7 +326,8 @@ async fn similar_bad_filter() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await; - let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await; + let (response, code) = + index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -361,10 +365,10 @@ async fn filter_invalid_syntax_object() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { + .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -400,10 +404,10 @@ async fn filter_invalid_syntax_array() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { + .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| { snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() { }); index .similar( - json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}), + json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}), |response, code| { assert_eq!(response, expected_response); assert_eq!(code, 400); @@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() { "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" }); index - .similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) + .similar( + json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}), + |response, code| { + assert_eq!(response, expected_response); + assert_eq!(code, 400); + }, + ) .await; } @@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() { server.set_features(json!({"vectorStore": true})).await; let index = server.index("test"); - let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { @@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() { } "###); - let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await; + let (response, code) = + index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await; snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { diff --git a/meilisearch/tests/similar/mod.rs b/meilisearch/tests/similar/mod.rs index b4c95b059..fa0797a41 100644 --- a/meilisearch/tests/similar/mod.rs +++ b/meilisearch/tests/similar/mod.rs @@ -80,9 +80,11 @@ async fn basic() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -154,13 +156,16 @@ async fn basic() { } ] "###); - }) + }, + ) .await; index - .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "How to Train Your Dragon: The Hidden World", @@ -232,7 +237,8 @@ async fn basic() { } ] "###); - }) + }, + ) .await; } @@ -272,7 +278,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4"); @@ -358,7 +364,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3"); @@ -426,7 +432,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2"); @@ -476,7 +482,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1"); @@ -508,7 +514,7 @@ async fn ranking_score_threshold() { index .similar( - json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}), + json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @"[]"); @@ -553,7 +559,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -617,7 +623,7 @@ async fn filter() { index .similar( - json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), + json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" @@ -681,9 +687,11 @@ async fn limit_and_offset() { index.wait_task(value.uid()).await; index - .similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(json_string!(response["hits"]), @r###" + .similar( + json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" [ { "title": "Escape Room", @@ -704,12 +712,13 @@ async fn limit_and_offset() { } ] "###); - }) + }, + ) .await; index .similar( - json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), + json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), @r###" diff --git a/meilisearch/tests/vector/binary_quantized.rs b/meilisearch/tests/vector/binary_quantized.rs new file mode 100644 index 000000000..d3fe3c824 --- /dev/null +++ b/meilisearch/tests/vector/binary_quantized.rs @@ -0,0 +1,380 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{GetAllDocumentsOptions, Server}; +use crate::json; +use crate::vector::generate_default_user_provided_documents; + +#[actix_rt::test] +async fn retrieve_binary_quantize_status_in_the_settings() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (settings, code) = index.settings().await; + snapshot!(code, @"200 OK"); + snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###); +} + +#[actix_rt::test] +async fn binary_quantize_before_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await.succeeded(); + + // Make sure the documents are binary quantized + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_after_sending_documents() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let documents = json!([ + {"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }}, + {"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await.succeeded(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + // Make sure the documents are binary quantized + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "manual": { + "embeddings": [ + [ + -1.0, + -1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "manual": { + "embeddings": [ + [ + 1.0, + 1.0, + -1.0 + ] + ], + "regenerate": false + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 2 + } + "###); +} + +#[actix_rt::test] +async fn try_to_disable_binary_quantization() { + let server = Server::new().await; + let index = server.index("doggo"); + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let ret = server.wait_task(response.uid()).await; + snapshot!(ret, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "manual": { + "source": "userProvided", + "dimensions": 3, + "binaryQuantized": false + } + } + }, + "error": { + "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn binary_quantize_clear_documents() { + let server = Server::new().await; + let index = generate_default_user_provided_documents(&server).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "manual": { + "binaryQuantized": true, + } + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let (value, _code) = index.clear_all_documents().await; + index.wait_task(value.uid()).await.succeeded(); + + // Make sure the documents DB has been cleared + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [], + "offset": 0, + "limit": 20, + "total": 0 + } + "###); + + // Make sure the arroy DB has been cleared + let (documents, _code) = + index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await; + snapshot!(documents, @r###" + { + "hits": [], + "query": "", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0, + "semanticHitCount": 0 + } + "###); +} diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 7c9b375d9..47d0c1051 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod binary_quantized; mod openai; mod rest; mod settings; @@ -624,7 +625,8 @@ async fn clear_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await; snapshot!(documents, @r###" { "hits": [], @@ -685,7 +687,11 @@ async fn add_remove_one_vector_4588() { let task = index.wait_task(value.uid()).await; snapshot!(task, name: "document-deleted"); - let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; + let (documents, _code) = index + .search_post( + json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }), + ) + .await; snapshot!(documents, @r###" { "hits": [ diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs index 2ede7df15..04c068c40 100644 --- a/meilisearch/tests/vector/openai.rs +++ b/meilisearch/tests/vector/openai.rs @@ -449,7 +449,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, })) .await; snapshot!(code, @"200 OK"); @@ -489,7 +489,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -529,7 +529,7 @@ async fn it_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -616,7 +616,7 @@ async fn tokenize_long_text() { "q": "grand chien de berger des montagnes", "showRankingScore": true, "attributesToRetrieve": ["id"], - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1064,7 +1064,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1104,7 +1104,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1144,7 +1144,7 @@ async fn smaller_dimensions() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1295,7 +1295,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1335,7 +1335,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1375,7 +1375,7 @@ async fn small_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1525,7 +1525,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1565,7 +1565,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1605,7 +1605,7 @@ async fn legacy_embedding_model() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1756,7 +1756,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "chien de chasse", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1796,7 +1796,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "petit chien", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); @@ -1836,7 +1836,7 @@ async fn it_still_works() { let (response, code) = index .search_post(json!({ "q": "grand chien de berger des montagnes", - "hybrid": {"semanticRatio": 1.0} + "hybrid": {"semanticRatio": 1.0, "embedder": "default"} })) .await; snapshot!(code, @"200 OK"); diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 0714a22ca..4f07ca18b 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -218,7 +218,8 @@ async fn reset_embedder_documents() { "###); // Make sure the arroy DB has been cleared - let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; + let (documents, _code) = + index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await; snapshot!(json_string!(documents), @r###" { "message": "Cannot find embedder with name `default`.", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 79b61b4f1..5fc2d65c8 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -17,7 +17,7 @@ bincode = "1.3.3" bstr = "1.9.1" bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } byteorder = "1.5.0" -charabia = { version = "0.9.0", default-features = false } +charabia = { version = "0.9.1", default-features = false } concat-arrays = "0.1.2" crossbeam-channel = "0.5.13" deserr = "0.6.2" @@ -80,7 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", tiktoken-rs = "0.5.9" liquid = "0.26.6" rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } -arroy = "0.4.0" +arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" } rand = "0.8.5" tracing = "0.1.40" ureq = { version = "2.10.0", features = ["json"] } @@ -106,6 +106,8 @@ all-tokenizations = [ "charabia/greek", "charabia/khmer", "charabia/vietnamese", + "charabia/swedish-recomposition", + "charabia/german-segmentation", ] # Use POSIX semaphores instead of SysV semaphores in LMDB @@ -138,6 +140,9 @@ khmer = ["charabia/khmer"] # allow vietnamese specialized tokenization vietnamese = ["charabia/vietnamese"] +# allow german specialized tokenization +german = ["charabia/german-segmentation"] + # force swedish character recomposition swedish-recomposition = ["charabia/swedish-recomposition"] diff --git a/milli/src/error.rs b/milli/src/error.rs index f0e92a9ab..400d3d3be 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -258,6 +258,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, + #[error( + "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors." + )] + InvalidDisableBinaryQuantization { embedder_name: String }, #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] diff --git a/milli/src/index.rs b/milli/src/index.rs index 512e911aa..c47896df7 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec}; use crate::order_by_map::OrderByMap; use crate::proximity::ProximityPrecision; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; -use crate::vector::{Embedding, EmbeddingConfig}; +use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, @@ -162,7 +162,7 @@ pub struct Index { /// Maps an embedder name to its id in the arroy store. pub embedder_category_id: Database, /// Vector store based on arroy™. - pub vector_arroy: arroy::Database, + pub vector_arroy: arroy::Database, /// Maps the document id to the document as an obkv store. pub(crate) documents: Database, @@ -1614,15 +1614,17 @@ impl Index { &'a self, rtxn: &'a RoTxn<'a>, embedder_id: u8, - ) -> impl Iterator>> + 'a { + quantized: bool, + ) -> impl Iterator> + 'a { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { - arroy::Reader::open(rtxn, k, self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e.into()), - }) - .transpose() + let reader = ArroyWrapper::new(self.vector_arroy, k, quantized); + // Here we don't care about the dimensions, but we want to know if we can read + // in the database or if its metadata are missing because there is no document with that many vectors. + match reader.dimensions(rtxn) { + Ok(_) => Some(Ok(reader)), + Err(arroy::Error::MissingMetadata(_)) => None, + Err(e) => Some(Err(e.into())), + } }) } @@ -1644,32 +1646,18 @@ impl Index { docid: DocumentId, ) -> Result>> { let mut res = BTreeMap::new(); - for row in self.embedder_category_id.iter(rtxn)? { - let (embedder_name, embedder_id) = row?; - let embedder_id = (embedder_id as u16) << 8; - let mut embeddings = Vec::new(); - 'vectors: for i in 0..=u8::MAX { - let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) - .map(Some) - .or_else(|e| match e { - arroy::Error::MissingMetadata(_) => Ok(None), - e => Err(e), - }) - .transpose(); - - let Some(reader) = reader else { - break 'vectors; - }; - - let embedding = reader?.item_vector(rtxn, docid)?; - if let Some(embedding) = embedding { - embeddings.push(embedding) - } else { - break 'vectors; - } - } - - res.insert(embedder_name.to_owned(), embeddings); + let embedding_configs = self.embedding_configs(rtxn)?; + for config in embedding_configs { + let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap(); + let embeddings = self + .arroy_readers(rtxn, embedder_id, config.config.quantized()) + .map_while(|reader| { + reader + .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into())) + .transpose() + }) + .collect::>>()?; + res.insert(config.name.to_owned(), embeddings); } Ok(res) } diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 62ae05740..a63bb634b 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::Display; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -37,6 +38,15 @@ pub enum OrderBy { Count, } +impl Display for OrderBy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + OrderBy::Lexicographic => f.write_str("alphabetically"), + OrderBy::Count => f.write_str("by count"), + } + } +} + pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, @@ -100,7 +110,6 @@ impl<'a> FacetDistribution<'a> { let mut lexicographic_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); - let distribution_prelength = distribution.len(); let db = self.index.field_id_docid_facet_f64s; for docid in candidates { key_buffer.truncate(mem::size_of::()); @@ -113,23 +122,21 @@ impl<'a> FacetDistribution<'a> { for result in iter { let ((_, _, value), ()) = result?; *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; - - if lexicographic_distribution.len() - distribution_prelength - == self.max_values_per_facet - { - break; - } } } - distribution.extend(lexicographic_distribution); + distribution.extend( + lexicographic_distribution + .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())), + ); } FacetType::String => { let mut normalized_distribution = BTreeMap::new(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let db = self.index.field_id_docid_facet_strings; - 'outer: for docid in candidates { + for docid in candidates { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db @@ -144,14 +151,14 @@ impl<'a> FacetDistribution<'a> { .or_insert_with(|| (original_value, 0)); *count += 1; - if normalized_distribution.len() == self.max_values_per_facet { - break 'outer; - } + // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid, + // so higher ranked facets could be in later docids } } let iter = normalized_distribution .into_iter() + .take(self.max_values_per_facet.saturating_sub(distribution.len())) .map(|(_normalized, (original, count))| (original.to_string(), count)); distribution.extend(iter); } @@ -467,7 +474,7 @@ mod tests { .execute() .unwrap(); - milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); + milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###); let map = FacetDistribution::new(&txn, &index) .facets(iter::once(("colour", OrderBy::Count))) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 9ce201aca..c059d2d27 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -12,7 +12,7 @@ use serde_json::Value; use super::facet_range_search; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ - FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, + FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ @@ -336,6 +336,24 @@ impl<'a> Filter<'a> { return Ok(docids); } + Condition::StartsWith { keyword: _, word } => { + let value = crate::normalize_facet(word.value()); + let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() }; + let docids = strings_db + .prefix_iter(rtxn, &base)? + .map(|result| -> Result { + match result { + Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap), + Err(_e) => Err(InternalError::from(SerializationError::Decoding { + db_name: Some(FACET_ID_STRING_DOCIDS), + }) + .into()), + } + }) + .union()?; + + return Ok(docids); + } }; let mut output = RoaringBitmap::new(); diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index e08111473..8b274804c 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -190,7 +190,7 @@ impl<'a> Search<'a> { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; // no embedder, no semantic search - let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { + let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else { return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); }; @@ -212,7 +212,7 @@ impl<'a> Search<'a> { }; search.semantic = - Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder }); + Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized }); // TODO: would be better to have two distinct functions at this point let vector_results = search.execute()?; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 3057066d2..d5b05f515 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -32,6 +32,7 @@ pub struct SemanticSearch { vector: Option>, embedder_name: String, embedder: Arc, + quantized: bool, } pub struct Search<'a> { @@ -89,9 +90,10 @@ impl<'a> Search<'a> { &mut self, embedder_name: String, embedder: Arc, + quantized: bool, vector: Option>, ) -> &mut Search<'a> { - self.semantic = Some(SemanticSearch { embedder_name, embedder, vector }); + self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector }); self } @@ -206,7 +208,7 @@ impl<'a> Search<'a> { degraded, used_negative_operator, } = match self.semantic.as_ref() { - Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => { + Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => { execute_vector_search( &mut ctx, vector, @@ -219,6 +221,7 @@ impl<'a> Search<'a> { self.limit, embedder_name, embedder, + *quantized, self.time_budget.clone(), self.ranking_score_threshold, )? diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index b30306a0b..f7c590360 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( Ok(ranking_rules) } +#[allow(clippy::too_many_arguments)] fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, @@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>( target: &[f32], embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result>> { // query graph search @@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>( limit_plus_offset, embedder_name, embedder, + quantized, )?; ranking_rules.push(Box::new(vector_sort)); vector = true; @@ -576,6 +579,7 @@ pub fn execute_vector_search( length: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, time_budget: TimeBudget, ranking_score_threshold: Option, ) -> Result { @@ -591,6 +595,7 @@ pub fn execute_vector_search( vector, embedder_name, embedder, + quantized, )?; let mut placeholder_search_logger = logger::DefaultSearchLogger; diff --git a/milli/src/search/new/vector_sort.rs b/milli/src/search/new/vector_sort.rs index e56f3cbbe..de1dacbe7 100644 --- a/milli/src/search/new/vector_sort.rs +++ b/milli/src/search/new/vector_sort.rs @@ -16,6 +16,7 @@ pub struct VectorSort { limit: usize, distribution_shift: Option, embedder_index: u8, + quantized: bool, } impl VectorSort { @@ -26,6 +27,7 @@ impl VectorSort { limit: usize, embedder_name: &str, embedder: &Embedder, + quantized: bool, ) -> Result { let embedder_index = ctx .index @@ -41,6 +43,7 @@ impl VectorSort { limit, distribution_shift: embedder.distribution(), embedder_index, + quantized, }) } @@ -49,16 +52,12 @@ impl VectorSort { ctx: &mut SearchContext<'_>, vector_candidates: &RoaringBitmap, ) -> Result<()> { - let readers: std::result::Result, _> = - ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect(); - let readers = readers?; - let target = &self.target; let mut results = Vec::new(); - for reader in readers.iter() { + for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) { let nns_by_vector = - reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?; + reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; results.extend(nns_by_vector.into_iter()); } results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); diff --git a/milli/src/search/similar.rs b/milli/src/search/similar.rs index bf5cc323f..0cb8d723d 100644 --- a/milli/src/search/similar.rs +++ b/milli/src/search/similar.rs @@ -18,9 +18,11 @@ pub struct Similar<'a> { embedder_name: String, embedder: Arc, ranking_score_threshold: Option, + quantized: bool, } impl<'a> Similar<'a> { + #[allow(clippy::too_many_arguments)] pub fn new( id: DocumentId, offset: usize, @@ -29,6 +31,7 @@ impl<'a> Similar<'a> { rtxn: &'a heed::RoTxn<'a>, embedder_name: String, embedder: Arc, + quantized: bool, ) -> Self { Self { id, @@ -40,6 +43,7 @@ impl<'a> Similar<'a> { embedder_name, embedder, ranking_score_threshold: None, + quantized, } } @@ -67,19 +71,13 @@ impl<'a> Similar<'a> { .get(self.rtxn, &self.embedder_name)? .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; - let readers: std::result::Result, _> = - self.index.arroy_readers(self.rtxn, embedder_index).collect(); - - let readers = readers?; - let mut results = Vec::new(); - for reader in readers.iter() { - let nns_by_item = reader.nns_by_item( + for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) { + let nns_by_item = reader?.nns_by_item( self.rtxn, self.id, self.limit + self.offset + 1, - None, Some(&universe), )?; if let Some(mut nns_by_item) = nns_by_item { diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index e9b83b92c..38a4ebe8a 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; -use crate::vector::settings::{EmbedderAction, ReindexAction}; +use crate::vector::settings::ReindexAction; use crate::vector::{Embedder, Embeddings}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; @@ -208,65 +208,65 @@ pub fn extract_vector_points( if reindex_vectors { for (name, action) in settings_diff.embedding_config_updates.iter() { - match action { - EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted - EmbedderAction::Reindex(action) => { - let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name) - else { - tracing::error!(embedder = name, "Requested embedder config not found"); - continue; - }; + if let Some(action) = action.reindex() { + let Some((embedder_name, (embedder, prompt, _quantized))) = + configs.remove_entry(name) + else { + tracing::error!(embedder = name, "Requested embedder config not found"); + continue; + }; - // (docid, _index) -> KvWriterDelAdd -> Vector - let manual_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid, _index) -> KvWriterDelAdd -> Vector + let manual_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> (prompt) - let prompts_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> (prompt) + let prompts_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - // (docid) -> () - let remove_vectors_writer = create_writer( - indexer.chunk_compression_type, - indexer.chunk_compression_level, - tempfile::tempfile()?, - ); + // (docid) -> () + let remove_vectors_writer = create_writer( + indexer.chunk_compression_type, + indexer.chunk_compression_level, + tempfile::tempfile()?, + ); - let action = match action { - ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, - ReindexAction::RegeneratePrompts => { - let Some((_, old_prompt)) = old_configs.get(name) else { - tracing::error!(embedder = name, "Old embedder config not found"); - continue; - }; + let action = match action { + ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, + ReindexAction::RegeneratePrompts => { + let Some((_, old_prompt, _quantized)) = old_configs.get(name) else { + tracing::error!(embedder = name, "Old embedder config not found"); + continue; + }; - ExtractionAction::SettingsRegeneratePrompts { old_prompt } - } - }; + ExtractionAction::SettingsRegeneratePrompts { old_prompt } + } + }; - extractors.push(EmbedderVectorExtractor { - embedder_name, - embedder, - prompt, - prompts_writer, - remove_vectors_writer, - manual_vectors_writer, - add_to_user_provided: RoaringBitmap::new(), - action, - }); - } + extractors.push(EmbedderVectorExtractor { + embedder_name, + embedder, + prompt, + prompts_writer, + remove_vectors_writer, + manual_vectors_writer, + add_to_user_provided: RoaringBitmap::new(), + action, + }); + } else { + continue; } } } else { // document operation - for (embedder_name, (embedder, prompt)) in configs.into_iter() { + for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() { // (docid, _index) -> KvWriterDelAdd -> Vector let manual_vectors_writer = create_writer( indexer.chunk_compression_type, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 6d659a7a2..326dd842d 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs; use crate::update::{ IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, }; -use crate::vector::EmbeddingConfigs; +use crate::vector::{ArroyWrapper, EmbeddingConfigs}; use crate::{CboRoaringBitmapCodec, Index, Object, Result}; static MERGED_DATABASE_COUNT: usize = 7; @@ -679,6 +679,24 @@ where let number_of_documents = self.index.number_of_documents(self.wtxn)?; let mut rng = rand::rngs::StdRng::seed_from_u64(42); + // If an embedder wasn't used in the typedchunk but must be binary quantized + // we should insert it in `dimension` + for (name, action) in settings_diff.embedding_config_updates.iter() { + if action.is_being_quantized && !dimension.contains_key(name.as_str()) { + let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or( + InternalError::DatabaseMissingEntry { + db_name: "embedder_category_id", + key: None, + }, + )?; + let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap(); + let reader = + ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized); + let dim = reader.dimensions(self.wtxn)?; + dimension.insert(name.to_string(), dim); + } + } + for (embedder_name, dimension) in dimension { let wtxn = &mut *self.wtxn; let vector_arroy = self.index.vector_arroy; @@ -686,13 +704,23 @@ where let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name); + let was_quantized = settings_diff + .old + .embedding_configs + .get(&embedder_name) + .map_or(false, |conf| conf.2); + let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized); pool.install(|| { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { - let writer = arroy::Writer::new(vector_arroy, k, dimension); - if writer.need_build(wtxn)? { - writer.build(wtxn, &mut rng, None)?; - } else if writer.is_empty(wtxn)? { + let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized); + if is_quantizing { + writer.quantize(wtxn, k, dimension)?; + } + if writer.need_build(wtxn, dimension)? { + writer.build(wtxn, &mut rng, dimension)?; + } else if writer.is_empty(wtxn, dimension)? { break; } } @@ -2746,6 +2774,7 @@ mod tests { response: Setting::NotSet, distribution: Setting::NotSet, headers: Setting::NotSet, + binary_quantized: Setting::NotSet, }), ); settings.set_embedder_settings(embedders); @@ -2774,7 +2803,7 @@ mod tests { std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); let res = index .search(&rtxn) - .semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) + .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec())) .execute() .unwrap(); assert_eq!(res.documents_ids.len(), 3); diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 73fa3ca7b..bb2cfe56c 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; -use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; +use crate::vector::settings::WriteBackToDocuments; +use crate::vector::ArroyWrapper; use crate::{ is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, }; @@ -989,19 +990,17 @@ impl<'a, 'i> Transform<'a, 'i> { None }; - let readers: Result< - BTreeMap<&str, (Vec>, &RoaringBitmap)>, - > = settings_diff + let readers: Result, &RoaringBitmap)>> = settings_diff .embedding_config_updates .iter() .filter_map(|(name, action)| { - if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }) = action + if let Some(WriteBackToDocuments { embedder_id, user_provided }) = + action.write_back() { - let readers: Result> = - self.index.arroy_readers(wtxn, *embedder_id).collect(); + let readers: Result> = self + .index + .arroy_readers(wtxn, *embedder_id, action.was_quantized) + .collect(); match readers { Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), Err(error) => Some(Err(error)), @@ -1104,23 +1103,14 @@ impl<'a, 'i> Transform<'a, 'i> { } } - let mut writers = Vec::new(); - // delete all vectors from the embedders that need removal for (_, (readers, _)) in readers { for reader in readers { - let dimensions = reader.dimensions(); - let arroy_index = reader.index(); - drop(reader); - let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions); - writers.push(writer); + let dimensions = reader.dimensions(wtxn)?; + reader.clear(wtxn, dimensions)?; } } - for writer in writers { - writer.clear(wtxn)?; - } - let grenad_params = GrenadParameters { chunk_compression_type: self.indexer_settings.chunk_compression_type, chunk_compression_level: self.indexer_settings.chunk_compression_level, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 9de95778b..97a4bf712 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{ as_cloneable_grenad, keep_latest_obkv, try_split_array_at, }; use crate::update::settings::InnerIndexSettingsDiff; +use crate::vector::ArroyWrapper; use crate::{ lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, Result, SerializationError, U8StrStrCodec, @@ -666,9 +667,14 @@ pub(crate) fn write_typed_chunk_into_index( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, )?; + let binary_quantized = settings_diff + .old + .embedding_configs + .get(&embedder_name) + .map_or(false, |conf| conf.2); // FIXME: allow customizing distance let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) - .map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension)) + .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized)) .collect(); // remove vectors for docids we want them removed @@ -679,7 +685,7 @@ pub(crate) fn write_typed_chunk_into_index( for writer in &writers { // Uses invariant: vectors are packed in the first writers. - if !writer.del_item(wtxn, docid)? { + if !writer.del_item(wtxn, expected_dimension, docid)? { break; } } @@ -711,7 +717,7 @@ pub(crate) fn write_typed_chunk_into_index( ))); } for (embedding, writer) in embeddings.iter().zip(&writers) { - writer.add_item(wtxn, docid, embedding)?; + writer.add_item(wtxn, expected_dimension, docid, embedding)?; } } @@ -734,7 +740,7 @@ pub(crate) fn write_typed_chunk_into_index( break; }; if candidate == vector { - writer.del_item(wtxn, docid)?; + writer.del_item(wtxn, expected_dimension, docid)?; deleted_index = Some(index); } } @@ -751,8 +757,13 @@ pub(crate) fn write_typed_chunk_into_index( if let Some((last_index, vector)) = last_index_with_a_vector { // unwrap: computed the index from the list of writers let writer = writers.get(last_index).unwrap(); - writer.del_item(wtxn, docid)?; - writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?; + writer.del_item(wtxn, expected_dimension, docid)?; + writers.get(deleted_index).unwrap().add_item( + wtxn, + expected_dimension, + docid, + &vector, + )?; } } } @@ -762,8 +773,8 @@ pub(crate) fn write_typed_chunk_into_index( // overflow was detected during vector extraction. for writer in &writers { - if !writer.contains_item(wtxn, docid)? { - writer.add_item(wtxn, docid, &vector)?; + if !writer.contains_item(wtxn, expected_dimension, docid)? { + writer.add_item(wtxn, expected_dimension, docid, &vector)?; break; } } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 8702e7ea6..6e2b53d58 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -954,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let old_configs = self.index.embedding_configs(self.wtxn)?; let remove_all: Result> = old_configs .into_iter() - .map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> { + .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> { let embedder_id = self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( crate::InternalError::DatabaseMissingEntry { @@ -964,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { )?; Ok(( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + config.quantized(), + ), )) }) .collect(); @@ -1004,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { match joined { // updated config EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { - let settings_diff = SettingsDiff::from_settings(old, new); + let was_quantized = old.binary_quantized.set().unwrap_or_default(); + let settings_diff = SettingsDiff::from_settings(&name, old, new)?; match settings_diff { SettingsDiff::Remove => { tracing::debug!( @@ -1023,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.index.embedder_category_id.delete(self.wtxn, &name)?; embedder_actions.insert( name, - EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { - embedder_id, - user_provided, - }), + EmbedderAction::with_write_back( + WriteBackToDocuments { embedder_id, user_provided }, + was_quantized, + ), ); } - SettingsDiff::Reindex { action, updated_settings } => { + SettingsDiff::Reindex { action, updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), ?action, "reindex embedder" ); - embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(action, was_quantized) + .with_is_being_quantized(quantize), + ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; updated_configs.insert(name, (new, user_provided)); } - SettingsDiff::UpdateWithoutReindex { updated_settings } => { + SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => { tracing::debug!( embedder = name, user_provided = user_provided.len(), @@ -1049,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { ); let new = validate_embedding_settings(Setting::Set(updated_settings), &name)?; + if quantize { + embedder_actions.insert( + name.clone(), + EmbedderAction::default().with_is_being_quantized(true), + ); + } updated_configs.insert(name, (new, user_provided)); } } @@ -1067,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { &mut setting, ); let setting = validate_embedding_settings(setting, &name)?; - embedder_actions - .insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex)); + embedder_actions.insert( + name.clone(), + EmbedderAction::with_reindex(ReindexAction::FullReindex, false), + ); updated_configs.insert(name, (setting, RoaringBitmap::new())); } } @@ -1082,19 +1095,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let mut find_free_index = move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); for (name, action) in embedder_actions.iter() { - match action { - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { - /* cannot be a new embedder, so has to have an id already */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { - if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { - let id = find_free_index() - .ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; - tracing::debug!(embedder = name, id, "assigning free id to new embedder"); - self.index.embedder_category_id.put(self.wtxn, name, &id)?; - } - } - EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ } + // ignore actions that are not possible for a new embedder + if matches!(action.reindex(), Some(ReindexAction::FullReindex)) + && self.index.embedder_category_id.get(self.wtxn, name)?.is_none() + { + let id = + find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?; + tracing::debug!(embedder = name, id, "assigning free id to new embedder"); + self.index.embedder_category_id.put(self.wtxn, name, &id)?; } } let updated_configs: Vec = updated_configs @@ -1277,7 +1285,11 @@ impl InnerIndexSettingsDiff { // if the user-defined searchables changed, then we need to reindex prompts. if cache_user_defined_searchables { - for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() { + for (embedder_name, (config, _, _quantized)) in + new_settings.embedding_configs.inner_as_ref() + { + let was_quantized = + old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2); // skip embedders that don't use document templates if !config.uses_document_template() { continue; @@ -1287,16 +1299,19 @@ impl InnerIndexSettingsDiff { // this always makes the code clearer by explicitly handling the cases match embedding_config_updates.entry(embedder_name.clone()) { std::collections::btree_map::Entry::Vacant(entry) => { - entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts)); + entry.insert(EmbedderAction::with_reindex( + ReindexAction::RegeneratePrompts, + was_quantized, + )); + } + std::collections::btree_map::Entry::Occupied(entry) => { + let EmbedderAction { + was_quantized: _, + is_being_quantized: _, + write_back: _, // We are deleting this embedder, so no point in regeneration + reindex: _, // We are already fully reindexing + } = entry.get(); } - std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { - EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */ - } - EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */ - } - EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */ - } - }, }; } } @@ -1546,7 +1561,7 @@ fn embedders(embedding_configs: Vec) -> Result) -> Result { let max_bytes = match document_template_max_bytes.set() { Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { @@ -1613,6 +1629,7 @@ fn validate_prompt( response, distribution, headers, + binary_quantized: binary_quantize, })) } new => Ok(new), @@ -1638,6 +1655,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, } = settings; if let Some(0) = dimensions.set() { @@ -1678,6 +1696,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, })); }; match inferred_source { @@ -1779,6 +1798,7 @@ pub fn validate_embedding_settings( response, distribution, headers, + binary_quantized: binary_quantize, })) } diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 04e646819..d52e68bbe 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -1,8 +1,12 @@ use std::collections::HashMap; use std::sync::Arc; +use arroy::distances::{Angular, BinaryQuantizedAngular}; +use arroy::ItemId; use deserr::{DeserializeError, Deserr}; +use heed::{RoTxn, RwTxn, Unspecified}; use ordered_float::OrderedFloat; +use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use self::error::{EmbedError, NewEmbedderError}; @@ -26,6 +30,171 @@ pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; +pub struct ArroyWrapper { + quantized: bool, + index: u16, + database: arroy::Database, +} + +impl ArroyWrapper { + pub fn new(database: arroy::Database, index: u16, quantized: bool) -> Self { + Self { database, index, quantized } + } + + pub fn index(&self) -> u16 { + self.index + } + + pub fn dimensions(&self, rtxn: &RoTxn) -> Result { + if self.quantized { + Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions()) + } else { + Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions()) + } + } + + pub fn quantize( + &mut self, + wtxn: &mut RwTxn, + index: u16, + dimension: usize, + ) -> Result<(), arroy::Error> { + if !self.quantized { + let writer = arroy::Writer::new(self.angular_db(), index, dimension); + writer.prepare_changing_distance::(wtxn)?; + self.quantized = true; + } + Ok(()) + } + + pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn) + } + } + + pub fn build( + &self, + wtxn: &mut RwTxn, + rng: &mut R, + dimension: usize, + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None) + } + } + + pub fn add_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + vector: &[f32], + ) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension) + .add_item(wtxn, item_id, vector) + } + } + + pub fn del_item( + &self, + wtxn: &mut RwTxn, + dimension: usize, + item_id: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id) + } + } + + pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn) + } + } + + pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn) + } + } + + pub fn contains_item( + &self, + rtxn: &RoTxn, + dimension: usize, + item: arroy::ItemId, + ) -> Result { + if self.quantized { + arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item) + } else { + arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item) + } + } + + pub fn nns_by_item( + &self, + rtxn: &RoTxn, + item: ItemId, + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())? + .nns_by_item(rtxn, item, limit, None, None, filter) + } + } + + pub fn nns_by_vector( + &self, + txn: &RoTxn, + item: &[f32], + limit: usize, + filter: Option<&RoaringBitmap>, + ) -> Result, arroy::Error> { + if self.quantized { + arroy::Reader::open(txn, self.index, self.quantized_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } else { + arroy::Reader::open(txn, self.index, self.angular_db())? + .nns_by_vector(txn, item, limit, None, None, filter) + } + } + + pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result>, arroy::Error> { + if self.quantized { + arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid) + } else { + arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid) + } + } + + fn angular_db(&self) -> arroy::Database { + self.database.remap_data_type() + } + + fn quantized_db(&self) -> arroy::Database { + self.database.remap_data_type() + } +} + /// One or multiple embeddings stored consecutively in a flat vector. pub struct Embeddings { data: Vec, @@ -124,62 +293,48 @@ pub struct EmbeddingConfig { pub embedder_options: EmbedderOptions, /// Document template pub prompt: PromptData, + /// If this embedder is binary quantized + pub quantized: Option, // TODO: add metrics and anything needed } +impl EmbeddingConfig { + pub fn quantized(&self) -> bool { + self.quantized.unwrap_or_default() + } +} + /// Map of embedder configurations. /// /// Each configuration is mapped to a name. #[derive(Clone, Default)] -pub struct EmbeddingConfigs(HashMap, Arc)>); +pub struct EmbeddingConfigs(HashMap, Arc, bool)>); impl EmbeddingConfigs { /// Create the map from its internal component.s - pub fn new(data: HashMap, Arc)>) -> Self { + pub fn new(data: HashMap, Arc, bool)>) -> Self { Self(data) } /// Get an embedder configuration and template from its name. - pub fn get(&self, name: &str) -> Option<(Arc, Arc)> { + pub fn get(&self, name: &str) -> Option<(Arc, Arc, bool)> { self.0.get(name).cloned() } - /// Get the default embedder configuration, if any. - pub fn get_default(&self) -> Option<(Arc, Arc)> { - self.get(self.get_default_embedder_name()) - } - - pub fn inner_as_ref(&self) -> &HashMap, Arc)> { + pub fn inner_as_ref(&self) -> &HashMap, Arc, bool)> { &self.0 } - pub fn into_inner(self) -> HashMap, Arc)> { + pub fn into_inner(self) -> HashMap, Arc, bool)> { self.0 } - - /// Get the name of the default embedder configuration. - /// - /// The default embedder is determined as follows: - /// - /// - If there is only one embedder, it is always the default. - /// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder. - /// - In all other cases, there is no default embedder. - pub fn get_default_embedder_name(&self) -> &str { - let mut it = self.0.keys(); - let first_name = it.next(); - let second_name = it.next(); - match (first_name, second_name) { - (None, _) => "default", - (Some(first), None) => first, - (Some(_), Some(_)) => "default", - } - } } impl IntoIterator for EmbeddingConfigs { - type Item = (String, (Arc, Arc)); + type Item = (String, (Arc, Arc, bool)); - type IntoIter = std::collections::hash_map::IntoIter, Arc)>; + type IntoIter = + std::collections::hash_map::IntoIter, Arc, bool)>; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index b7ae90d89..3bb7f09e6 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -32,6 +32,9 @@ pub struct EmbeddingSettings { pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub binary_quantized: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] @@ -85,23 +88,63 @@ pub enum ReindexAction { pub enum SettingsDiff { Remove, - Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, - UpdateWithoutReindex { updated_settings: EmbeddingSettings }, + Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool }, + UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool }, } -pub enum EmbedderAction { - WriteBackToDocuments(WriteBackToDocuments), - Reindex(ReindexAction), +#[derive(Default, Debug)] +pub struct EmbedderAction { + pub was_quantized: bool, + pub is_being_quantized: bool, + pub write_back: Option, + pub reindex: Option, } +impl EmbedderAction { + pub fn is_being_quantized(&self) -> bool { + self.is_being_quantized + } + + pub fn write_back(&self) -> Option<&WriteBackToDocuments> { + self.write_back.as_ref() + } + + pub fn reindex(&self) -> Option<&ReindexAction> { + self.reindex.as_ref() + } + + pub fn with_is_being_quantized(mut self, quantize: bool) -> Self { + self.is_being_quantized = quantize; + self + } + + pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self { + Self { + was_quantized, + is_being_quantized: false, + write_back: Some(write_back), + reindex: None, + } + } + + pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self { + Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) } + } +} + +#[derive(Debug)] pub struct WriteBackToDocuments { pub embedder_id: u8, pub user_provided: RoaringBitmap, } impl SettingsDiff { - pub fn from_settings(old: EmbeddingSettings, new: Setting) -> Self { - match new { + pub fn from_settings( + embedder_name: &str, + old: EmbeddingSettings, + new: Setting, + ) -> Result { + let ret = match new { Setting::Set(new) => { let EmbeddingSettings { mut source, @@ -116,6 +159,7 @@ impl SettingsDiff { mut distribution, mut headers, mut document_template_max_bytes, + binary_quantized: mut binary_quantize, } = old; let EmbeddingSettings { @@ -131,8 +175,17 @@ impl SettingsDiff { distribution: new_distribution, headers: new_headers, document_template_max_bytes: new_document_template_max_bytes, + binary_quantized: new_binary_quantize, } = new; + if matches!(binary_quantize, Setting::Set(true)) + && matches!(new_binary_quantize, Setting::Set(false)) + { + return Err(UserError::InvalidDisableBinaryQuantization { + embedder_name: embedder_name.to_string(), + }); + } + let mut reindex_action = None; // **Warning**: do not use short-circuiting || here, we want all these operations applied @@ -172,6 +225,7 @@ impl SettingsDiff { _ => {} } } + let binary_quantize_changed = binary_quantize.apply(new_binary_quantize); if url.apply(new_url) { match source { // do not regenerate on an url change in OpenAI @@ -231,16 +285,27 @@ impl SettingsDiff { distribution, headers, document_template_max_bytes, + binary_quantized: binary_quantize, }; match reindex_action { - Some(action) => Self::Reindex { action, updated_settings }, - None => Self::UpdateWithoutReindex { updated_settings }, + Some(action) => Self::Reindex { + action, + updated_settings, + quantize: binary_quantize_changed, + }, + None => Self::UpdateWithoutReindex { + updated_settings, + quantize: binary_quantize_changed, + }, } } Setting::Reset => Self::Remove, - Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, - } + Setting::NotSet => { + Self::UpdateWithoutReindex { updated_settings: old, quantize: false } + } + }; + Ok(ret) } } @@ -486,7 +551,7 @@ impl std::fmt::Display for EmbedderSource { impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { - let EmbeddingConfig { embedder_options, prompt } = value; + let EmbeddingConfig { embedder_options, prompt, quantized } = value; let document_template_max_bytes = Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { @@ -507,6 +572,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { url, @@ -527,6 +593,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { embedding_model, @@ -547,6 +614,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { dimensions, @@ -564,6 +632,7 @@ impl From for EmbeddingSettings { response: Setting::NotSet, headers: Setting::NotSet, distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), }, super::EmbedderOptions::Rest(super::rest::EmbedderOptions { api_key, @@ -586,6 +655,7 @@ impl From for EmbeddingSettings { response: Setting::Set(response), distribution: Setting::some_or_not_set(distribution), headers: Setting::Set(headers), + binary_quantized: Setting::some_or_not_set(quantized), }, } } @@ -607,8 +677,11 @@ impl From for EmbeddingConfig { response, distribution, headers, + binary_quantized, } = value; + this.quantized = binary_quantized.set(); + if let Some(source) = source.set() { match source { EmbedderSource::OpenAi => {