Merge branch 'main' into change-matches-position-phrase-search

This commit is contained in:
F. Levi 2024-09-27 15:52:05 +03:00
commit 00ccf53ffa
64 changed files with 6616 additions and 3694 deletions

16
Cargo.lock generated
View File

@ -387,14 +387,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]] [[package]]
name = "arroy" name = "arroy"
version = "0.4.0" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/meilisearch/arroy/?rev=2386594dfb009ce08821a925ccc89fb8e30bf73d#2386594dfb009ce08821a925ccc89fb8e30bf73d"
checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"heed", "heed",
"log", "log",
"memmap2", "memmap2",
"nohash",
"ordered-float", "ordered-float",
"rand", "rand",
"rayon", "rayon",
@ -933,9 +933,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.9.0" version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03cd8f290cae94934cdd0103c14c2de9faf2d7d85be0d24d511af2bf1b14119d" checksum = "55ff52497324e7d168505a16949ae836c14595606fab94687238d2f6c8d4c798"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"csv", "csv",
@ -2838,7 +2838,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d" checksum = "e310b3a6b5907f99202fcdb4960ff45b93735d7c7d96b760fcff8db2dc0e103d"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
"windows-targets 0.48.1", "windows-targets 0.52.4",
] ]
[[package]] [[package]]
@ -3686,6 +3686,12 @@ version = "0.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e" checksum = "6d02c0b00610773bb7fc61d85e13d86c7858cbdf00e1a120bfc41bc055dbaa0e"
[[package]]
name = "nohash"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0f889fb66f7acdf83442c35775764b51fed3c606ab9cee51500dbde2cf528ca"
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"

View File

@ -45,14 +45,14 @@ See the list of all our example apps in our [demos repository](https://github.co
## ✨ Features ## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results - **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience - **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
- **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings - **[Typo tolerance](https://www.meilisearch.com/docs/learn/relevancy/typo_tolerance_settings?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://www.meilisearch.com/docs/learn/configuration/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results - **[Synonym support](https://www.meilisearch.com/docs/learn/relevancy/synonyms?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data - **[Geosearch](https://www.meilisearch.com/docs/learn/fine_tuning_results/geosearch?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** filter and sort documents based on geographic data
- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet - **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling - **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants - **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** personalize search results for any number of application tenants
- **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs - **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** integrate Meilisearch in your technical stack with our plugins and SDKs
- **Easy to install, deploy, and maintain** - **Easy to install, deploy, and maintain**

View File

@ -255,6 +255,8 @@ pub(crate) mod test {
} }
"###); "###);
insta::assert_json_snapshot!(vector_index.settings().unwrap());
{ {
let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect(); let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
let mut documents = documents.unwrap(); let mut documents = documents.unwrap();

View File

@ -1,783 +1,56 @@
--- ---
source: dump/src/reader/mod.rs source: dump/src/reader/mod.rs
expression: document expression: vector_index.settings().unwrap()
--- ---
{ {
"id": "e3", "displayedAttributes": [
"desc": "overriden vector + map", "*"
"_vectors": {
"default": [
0.2,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
], ],
"toto": [ "searchableAttributes": [
0.1 "*"
] ],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"nonSeparatorTokens": [],
"separatorTokens": [],
"dictionary": [],
"synonyms": {},
"distinctAttribute": null,
"proximityPrecision": "byWord",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 5,
"twoTypos": 9
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100,
"sortFacetValuesBy": {
"*": "alpha"
} }
},
"pagination": {
"maxTotalHits": 1000
},
"embedders": {
"default": {
"source": "huggingFace",
"model": "BAAI/bge-base-en-v1.5",
"revision": "617ca489d9e86b49b8167676d8220688b99db36e",
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}"
}
},
"searchCutoffMs": null
} }

View File

@ -0,0 +1,780 @@
---
source: dump/src/reader/mod.rs
expression: document
---
{
"id": "e0",
"desc": "overriden vector",
"_vectors": {
"default": [
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1,
0.1
]
}
}

View File

@ -27,6 +27,7 @@ pub enum Condition<'a> {
LowerThanOrEqual(Token<'a>), LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> }, Between { from: Token<'a>, to: Token<'a> },
Contains { keyword: Token<'a>, word: Token<'a> }, Contains { keyword: Token<'a>, word: Token<'a> },
StartsWith { keyword: Token<'a>, word: Token<'a> },
} }
/// condition = value ("==" | ">" ...) value /// condition = value ("==" | ">" ...) value
@ -121,6 +122,34 @@ pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
)) ))
} }
/// starts with = value "CONTAINS" value
pub fn parse_starts_with(input: Span) -> IResult<FilterCondition> {
let (input, (fid, starts_with, value)) =
tuple((parse_value, tag("STARTS WITH"), cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Condition {
fid,
op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
},
))
}
/// starts with = value "NOT" WS+ "CONTAINS" value
pub fn parse_not_starts_with(input: Span) -> IResult<FilterCondition> {
let keyword = tuple((tag("NOT"), multispace1, tag("STARTS WITH")));
let (input, (fid, (_not, _spaces, starts_with), value)) =
tuple((parse_value, keyword, cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Not(Box::new(FilterCondition::Condition {
fid,
op: StartsWith { keyword: Token { span: starts_with, value: None }, word: value },
})),
))
}
/// to = value value "TO" WS+ value /// to = value value "TO" WS+ value
pub fn parse_to(input: Span) -> IResult<FilterCondition> { pub fn parse_to(input: Span) -> IResult<FilterCondition> {
let (input, (key, from, _, _, to)) = let (input, (key, from, _, _, to)) =

View File

@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
} }
ErrorKind::InvalidPrimary => { ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
} }
ErrorKind::InvalidEscapedNumber => { ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?

View File

@ -49,7 +49,7 @@ use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition}; pub use condition::{parse_condition, parse_to, Condition};
use condition::{ use condition::{
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
parse_is_null, parse_not_contains, parse_not_exists, parse_is_null, parse_not_contains, parse_not_exists, parse_not_starts_with, parse_starts_with,
}; };
use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
pub use error::{Error, ErrorKind}; pub use error::{Error, ErrorKind};
@ -166,7 +166,8 @@ impl<'a> FilterCondition<'a> {
| Condition::LowerThan(_) | Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_) | Condition::LowerThanOrEqual(_)
| Condition::Between { .. } => None, | Condition::Between { .. } => None,
Condition::Contains { keyword, word: _ } => Some(keyword), Condition::Contains { keyword, word: _ }
| Condition::StartsWith { keyword, word: _ } => Some(keyword),
}, },
FilterCondition::Not(this) => this.use_contains_operator(), FilterCondition::Not(this) => this.use_contains_operator(),
FilterCondition::Or(seq) | FilterCondition::And(seq) => { FilterCondition::Or(seq) | FilterCondition::And(seq) => {
@ -484,6 +485,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_to, parse_to,
parse_contains, parse_contains,
parse_not_contains, parse_not_contains,
parse_starts_with,
parse_not_starts_with,
// the next lines are only for error handling and are written at the end to have the less possible performance impact // the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo, parse_geo,
parse_geo_distance, parse_geo_distance,
@ -567,6 +570,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
Condition::Between { from, to } => write!(f, "{from} TO {to}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"),
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"), Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
Condition::StartsWith { word, keyword: _ } => write!(f, "STARTS WITH {word}"),
} }
} }
} }
@ -680,6 +684,13 @@ pub mod tests {
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
// Test STARTS WITH + NOT STARTS WITH
insta::assert_snapshot!(p("subscribers STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
insta::assert_snapshot!(p("NOT subscribers STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
insta::assert_snapshot!(p("subscribers NOT STARTS WITH hel"), @"NOT ({subscribers} STARTS WITH {hel})");
insta::assert_snapshot!(p("NOT subscribers NOT STARTS WITH 'hel'"), @"{subscribers} STARTS WITH {hel}");
insta::assert_snapshot!(p("subscribers NOT STARTS WITH 'hel'"), @"NOT ({subscribers} STARTS WITH {hel})");
// Test nested NOT // Test nested NOT
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
@ -751,7 +762,7 @@ pub mod tests {
"###); "###);
insta::assert_snapshot!(p("'OR'"), @r###" insta::assert_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR' 1:5 'OR'
"###); "###);
@ -761,12 +772,12 @@ pub mod tests {
"###); "###);
insta::assert_snapshot!(p("channel Ponce"), @r###" insta::assert_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce 1:14 channel Ponce
"###); "###);
insta::assert_snapshot!(p("channel = Ponce OR"), @r###" insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR 19:19 channel = Ponce OR
"###); "###);
@ -851,12 +862,12 @@ pub mod tests {
"###); "###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###" insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST 1:17 colour NOT EXIST
"###); "###);
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000 1:23 subscribers 100 TO1000
"###); "###);
@ -919,35 +930,35 @@ pub mod tests {
"###); "###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###" insta::assert_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL 1:11 value NULL
"###); "###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL 1:15 value NOT NULL
"###); "###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY 1:12 value EMPTY
"###); "###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY 1:16 value NOT EMPTY
"###); "###);
insta::assert_snapshot!(p(r#"value IS"#), @r###" insta::assert_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS 1:9 value IS
"###); "###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT 1:13 value IS NOT
"###); "###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS 1:16 value IS EXISTS
"###); "###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS 1:20 value IS NOT EXISTS
"###); "###);
} }

View File

@ -212,6 +212,8 @@ fn is_keyword(s: &str) -> bool {
| "NULL" | "NULL"
| "EMPTY" | "EMPTY"
| "CONTAINS" | "CONTAINS"
| "STARTS"
| "WITH"
| "_geoRadius" | "_geoRadius"
| "_geoBoundingBox" | "_geoBoundingBox"
) )

View File

@ -40,7 +40,7 @@ ureq = "2.10.0"
uuid = { version = "1.10.0", features = ["serde", "v4"] } uuid = { version = "1.10.0", features = ["serde", "v4"] }
[dev-dependencies] [dev-dependencies]
arroy = "0.4.0" arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
big_s = "1.0.2" big_s = "1.0.2"
crossbeam = "0.8.4" crossbeam = "0.8.4"
insta = { version = "1.39.0", features = ["json", "redactions"] } insta = { version = "1.39.0", features = ["json", "redactions"] }

View File

@ -87,7 +87,7 @@ impl RoFeatures {
Ok(()) Ok(())
} else { } else {
Err(FeatureNotEnabledError { Err(FeatureNotEnabledError {
disabled_action: "Using `CONTAINS` in a filter", disabled_action: "Using `CONTAINS` or `STARTS WITH` in a filter",
feature: "contains filter", feature: "contains filter",
issue_link: "https://github.com/orgs/meilisearch/discussions/763", issue_link: "https://github.com/orgs/meilisearch/discussions/763",
} }

View File

@ -1477,7 +1477,7 @@ impl IndexScheduler {
.map( .map(
|IndexEmbeddingConfig { |IndexEmbeddingConfig {
name, name,
config: milli::vector::EmbeddingConfig { embedder_options, prompt }, config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
.. ..
}| { }| {
let prompt = let prompt =
@ -1486,7 +1486,10 @@ impl IndexScheduler {
{ {
let embedders = self.embedders.read().unwrap(); let embedders = self.embedders.read().unwrap();
if let Some(embedder) = embedders.get(&embedder_options) { if let Some(embedder) = embedders.get(&embedder_options) {
return Ok((name, (embedder.clone(), prompt))); return Ok((
name,
(embedder.clone(), prompt, quantized.unwrap_or_default()),
));
} }
} }
@ -1500,7 +1503,7 @@ impl IndexScheduler {
let mut embedders = self.embedders.write().unwrap(); let mut embedders = self.embedders.write().unwrap();
embedders.insert(embedder_options, embedder.clone()); embedders.insert(embedder_options, embedder.clone());
} }
Ok((name, (embedder, prompt))) Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
}, },
) )
.collect(); .collect();
@ -5197,7 +5200,7 @@ mod tests {
let simple_hf_name = name.clone(); let simple_hf_name = name.clone();
let configs = index_scheduler.embedders(configs).unwrap(); let configs = index_scheduler.embedders(configs).unwrap();
let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap(); let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap(); let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap(); let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
@ -5519,6 +5522,7 @@ mod tests {
400, 400,
), ),
}, },
quantized: None,
}, },
user_provided: RoaringBitmap<[1, 2]>, user_provided: RoaringBitmap<[1, 2]>,
}, },
@ -5531,28 +5535,8 @@ mod tests {
// the document with the id 3 should keep its original embedding // the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap(); let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let mut embeddings = Vec::new(); let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embeddings = &embeddings["my_doggo_embedder"];
'vectors: for i in 0..=u8::MAX {
let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
.map(Some)
.or_else(|e| match e {
arroy::Error::MissingMetadata(_) => Ok(None),
e => Err(e),
})
.transpose();
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader.unwrap().item_vector(&rtxn, docid).unwrap();
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
snapshot!(embeddings.len(), @"1"); snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]); assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@ -5737,6 +5721,7 @@ mod tests {
400, 400,
), ),
}, },
quantized: None,
}, },
user_provided: RoaringBitmap<[0]>, user_provided: RoaringBitmap<[0]>,
}, },
@ -5780,6 +5765,7 @@ mod tests {
400, 400,
), ),
}, },
quantized: None,
}, },
user_provided: RoaringBitmap<[]>, user_provided: RoaringBitmap<[]>,
}, },

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [] enqueued []

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [0,] enqueued [0,]

View File

@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
[] []
---------------------------------------------------------------------- ----------------------------------------------------------------------
### All Tasks: ### All Tasks:
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }} 0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
---------------------------------------------------------------------- ----------------------------------------------------------------------
### Status: ### Status:
enqueued [] enqueued []

View File

@ -66,3 +66,5 @@ khmer = ["milli/khmer"]
vietnamese = ["milli/vietnamese"] vietnamese = ["milli/vietnamese"]
# force swedish character recomposition # force swedish character recomposition
swedish-recomposition = ["milli/swedish-recomposition"] swedish-recomposition = ["milli/swedish-recomposition"]
# force german character recomposition
german = ["milli/german"]

View File

@ -238,8 +238,14 @@ InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ; InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ; InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ; InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacetsByIndex , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFacetOrder , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederated , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchFederationOptions , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchMaxValuesPerFacet , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchMergeFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryFacets , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryPagination , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchQueryRankingRules , InvalidRequest , BAD_REQUEST ;
InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ; InvalidMultiSearchWeight , InvalidRequest , BAD_REQUEST ;
@ -389,7 +395,10 @@ impl ErrorCode for milli::Error {
| UserError::InvalidSettingsDimensions { .. } | UserError::InvalidSettingsDimensions { .. }
| UserError::InvalidUrl { .. } | UserError::InvalidUrl { .. }
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders, | UserError::InvalidPrompt(_)
| UserError::InvalidDisableBinaryQuantization { .. } => {
Code::InvalidSettingsEmbedders
}
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,

View File

@ -1,3 +1,4 @@
use std::borrow::Borrow;
use std::error::Error; use std::error::Error;
use std::fmt; use std::fmt;
use std::str::FromStr; use std::str::FromStr;
@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode};
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long /// bytes long
#[derive(Debug, Clone, PartialEq, Eq, Deserr)] #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)]
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
pub struct IndexUid(String); pub struct IndexUid(String);
@ -70,6 +71,12 @@ impl From<IndexUid> for String {
} }
} }
impl Borrow<String> for IndexUid {
fn borrow(&self) -> &String {
&self.0
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct IndexUidFormatError { pub struct IndexUidFormatError {
pub invalid_uid: String, pub invalid_uid: String,

View File

@ -39,12 +39,14 @@ macro_rules! make_locale {
pub enum Locale { pub enum Locale {
$($iso_639_1,)+ $($iso_639_1,)+
$($iso_639_3,)+ $($iso_639_3,)+
Cmn,
} }
impl From<milli::tokenizer::Language> for Locale { impl From<milli::tokenizer::Language> for Locale {
fn from(other: milli::tokenizer::Language) -> Locale { fn from(other: milli::tokenizer::Language) -> Locale {
match other { match other {
$(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+
milli::tokenizer::Language::Cmn => Locale::Cmn,
} }
} }
} }
@ -54,6 +56,7 @@ macro_rules! make_locale {
match other { match other {
$(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+
$(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+
Locale::Cmn => milli::tokenizer::Language::Cmn,
} }
} }
} }
@ -65,6 +68,7 @@ macro_rules! make_locale {
let locale = match s { let locale = match s {
$($iso_639_1_str => Locale::$iso_639_1,)+ $($iso_639_1_str => Locale::$iso_639_1,)+
$($iso_639_3_str => Locale::$iso_639_3,)+ $($iso_639_3_str => Locale::$iso_639_3,)+
"cmn" => Locale::Cmn,
_ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }),
}; };
@ -79,8 +83,9 @@ macro_rules! make_locale {
impl std::fmt::Display for LocaleFormatError { impl std::fmt::Display for LocaleFormatError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", "); let mut valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+,"cmn"];
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) valid_locales.sort_by(|left, right| left.len().cmp(&right.len()).then(left.cmp(right)));
write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales.join(", "))
} }
} }
@ -99,7 +104,6 @@ make_locale!(
(Bg, "bg") => (Bul, "bul"), (Bg, "bg") => (Bul, "bul"),
(Ca, "ca") => (Cat, "cat"), (Ca, "ca") => (Cat, "cat"),
(Cs, "cs") => (Ces, "ces"), (Cs, "cs") => (Ces, "ces"),
(Zh, "zh") => (Cmn, "cmn"),
(Da, "da") => (Dan, "dan"), (Da, "da") => (Dan, "dan"),
(De, "de") => (Deu, "deu"), (De, "de") => (Deu, "deu"),
(El, "el") => (Ell, "ell"), (El, "el") => (Ell, "ell"),
@ -157,5 +161,6 @@ make_locale!(
(Uz, "uz") => (Uzb, "uzb"), (Uz, "uz") => (Uzb, "uzb"),
(Vi, "vi") => (Vie, "vie"), (Vi, "vi") => (Vie, "vie"),
(Yi, "yi") => (Yid, "yid"), (Yi, "yi") => (Yid, "yid"),
(Zh, "zh") => (Zho, "zho"),
(Zu, "zu") => (Zul, "zul"), (Zu, "zu") => (Zul, "zul"),
); );

View File

@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"] khmer = ["meilisearch-types/khmer"]
vietnamese = ["meilisearch-types/vietnamese"] vietnamese = ["meilisearch-types/vietnamese"]
swedish-recomposition = ["meilisearch-types/swedish-recomposition"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
german = ["meilisearch-types/german"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"

View File

@ -646,8 +646,6 @@ pub struct SearchAggregator {
max_vector_size: usize, max_vector_size: usize,
// Whether the semantic ratio passed to a hybrid search equals the default ratio. // Whether the semantic ratio passed to a hybrid search equals the default ratio.
semantic_ratio: bool, semantic_ratio: bool,
// Whether a non-default embedder was specified
embedder: bool,
hybrid: bool, hybrid: bool,
retrieve_vectors: bool, retrieve_vectors: bool,
@ -795,7 +793,6 @@ impl SearchAggregator {
if let Some(hybrid) = hybrid { if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO(); ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
ret.embedder = hybrid.embedder.is_some();
ret.hybrid = true; ret.hybrid = true;
} }
@ -863,7 +860,6 @@ impl SearchAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
semantic_ratio, semantic_ratio,
embedder,
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator, total_used_negative_operator,
@ -923,7 +919,6 @@ impl SearchAggregator {
self.retrieve_vectors |= retrieve_vectors; self.retrieve_vectors |= retrieve_vectors;
self.semantic_ratio |= semantic_ratio; self.semantic_ratio |= semantic_ratio;
self.hybrid |= hybrid; self.hybrid |= hybrid;
self.embedder |= embedder;
// pagination // pagination
self.max_limit = self.max_limit.max(max_limit); self.max_limit = self.max_limit.max(max_limit);
@ -999,7 +994,6 @@ impl SearchAggregator {
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
semantic_ratio, semantic_ratio,
embedder,
hybrid, hybrid,
total_degraded, total_degraded,
total_used_negative_operator, total_used_negative_operator,
@ -1051,7 +1045,6 @@ impl SearchAggregator {
"hybrid": { "hybrid": {
"enabled": hybrid, "enabled": hybrid,
"semantic_ratio": semantic_ratio, "semantic_ratio": semantic_ratio,
"embedder": embedder,
}, },
"pagination": { "pagination": {
"max_limit": max_limit, "max_limit": max_limit,
@ -1782,7 +1775,6 @@ pub struct SimilarAggregator {
used_syntax: HashMap<String, usize>, used_syntax: HashMap<String, usize>,
// Whether a non-default embedder was specified // Whether a non-default embedder was specified
embedder: bool,
retrieve_vectors: bool, retrieve_vectors: bool,
// pagination // pagination
@ -1803,7 +1795,7 @@ impl SimilarAggregator {
pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self { pub fn from_query(query: &SimilarQuery, request: &HttpRequest) -> Self {
let SimilarQuery { let SimilarQuery {
id: _, id: _,
embedder, embedder: _,
offset, offset,
limit, limit,
attributes_to_retrieve: _, attributes_to_retrieve: _,
@ -1851,7 +1843,6 @@ impl SimilarAggregator {
ret.show_ranking_score_details = *show_ranking_score_details; ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some(); ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.embedder = embedder.is_some();
ret.retrieve_vectors = *retrieve_vectors; ret.retrieve_vectors = *retrieve_vectors;
ret ret
@ -1883,7 +1874,6 @@ impl SimilarAggregator {
max_attributes_to_retrieve, max_attributes_to_retrieve,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder,
ranking_score_threshold, ranking_score_threshold,
retrieve_vectors, retrieve_vectors,
} = other; } = other;
@ -1914,7 +1904,6 @@ impl SimilarAggregator {
*used_syntax = used_syntax.saturating_add(value); *used_syntax = used_syntax.saturating_add(value);
} }
self.embedder |= embedder;
self.retrieve_vectors |= retrieve_vectors; self.retrieve_vectors |= retrieve_vectors;
// pagination // pagination
@ -1948,7 +1937,6 @@ impl SimilarAggregator {
max_attributes_to_retrieve, max_attributes_to_retrieve,
show_ranking_score, show_ranking_score,
show_ranking_score_details, show_ranking_score_details,
embedder,
ranking_score_threshold, ranking_score_threshold,
retrieve_vectors, retrieve_vectors,
} = self; } = self;
@ -1980,9 +1968,6 @@ impl SimilarAggregator {
"vector": { "vector": {
"retrieve_vectors": retrieve_vectors, "retrieve_vectors": retrieve_vectors,
}, },
"hybrid": {
"embedder": embedder,
},
"pagination": { "pagination": {
"max_limit": max_limit, "max_limit": max_limit,
"max_offset": max_offset, "max_offset": max_offset,

View File

@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType};
use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
use meilisearch_types::error::{Code, ErrorCode, ResponseError}; use meilisearch_types::error::{Code, ErrorCode, ResponseError};
use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
use meilisearch_types::milli::OrderBy;
use serde_json::Value; use serde_json::Value;
use tokio::task::JoinError; use tokio::task::JoinError;
@ -27,10 +28,20 @@ pub enum MeilisearchHttpError {
EmptyFilter, EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value), InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")]
FederationOptionsInNonFederatedRequest(usize), FederationOptionsInNonFederatedRequest(usize),
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")]
PaginationInFederatedQuery(usize, &'static str), PaginationInFederatedQuery(usize, &'static str),
#[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")]
FacetsInFederatedQuery(usize, String, Vec<String>),
#[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")]
InconsistentFacetOrder {
facet: String,
previous_facet_order: OrderBy,
previous_uid: String,
index_facet_order: OrderBy,
current_uid: String,
},
#[error("A {0} payload is missing.")] #[error("A {0} payload is missing.")]
MissingPayload(PayloadType), MissingPayload(PayloadType),
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")] #[error("Too many search requests running at the same time: {0}. Retry after 10s.")]
@ -61,7 +72,7 @@ pub enum MeilisearchHttpError {
DocumentFormat(#[from] DocumentFormatError), DocumentFormat(#[from] DocumentFormatError),
#[error(transparent)] #[error(transparent)]
Join(#[from] JoinError), Join(#[from] JoinError),
#[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")] #[error("Invalid request: missing `hybrid` parameter when `vector` is present.")]
MissingSearchHybrid, MissingSearchHybrid,
} }
@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { MeilisearchHttpError::PaginationInFederatedQuery(_, _) => {
Code::InvalidMultiSearchQueryPagination Code::InvalidMultiSearchQueryPagination
} }
MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets,
MeilisearchHttpError::InconsistentFacetOrder { .. } => {
Code::InvalidMultiSearchFacetOrder
}
} }
} }
} }

View File

@ -128,8 +128,10 @@ impl std::ops::Deref for SemanticRatioGet {
} }
} }
impl From<SearchQueryGet> for SearchQuery { impl TryFrom<SearchQueryGet> for SearchQuery {
fn from(other: SearchQueryGet) -> Self { type Error = ResponseError;
fn try_from(other: SearchQueryGet) -> Result<Self, Self::Error> {
let filter = match other.filter { let filter = match other.filter {
Some(f) => match serde_json::from_str(&f) { Some(f) => match serde_json::from_str(&f) {
Ok(v) => Some(v), Ok(v) => Some(v),
@ -140,19 +142,28 @@ impl From<SearchQueryGet> for SearchQuery {
let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) { let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
(None, None) => None, (None, None) => None,
(None, Some(semantic_ratio)) => { (None, Some(_)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None }) return Err(ResponseError::from_msg(
"`hybridEmbedder` is mandatory when `hybridSemanticRatio` is present".into(),
meilisearch_types::error::Code::InvalidHybridQuery,
));
}
(Some(embedder), None) => {
Some(HybridQuery { semantic_ratio: DEFAULT_SEMANTIC_RATIO(), embedder })
} }
(Some(embedder), None) => Some(HybridQuery {
semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
embedder: Some(embedder),
}),
(Some(embedder), Some(semantic_ratio)) => { (Some(embedder), Some(semantic_ratio)) => {
Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) }) Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder })
} }
}; };
Self { if other.vector.is_some() && hybrid.is_none() {
return Err(ResponseError::from_msg(
"`hybridEmbedder` is mandatory when `vector` is present".into(),
meilisearch_types::error::Code::MissingSearchHybrid,
));
}
Ok(Self {
q: other.q, q: other.q,
vector: other.vector.map(CS::into_inner), vector: other.vector.map(CS::into_inner),
offset: other.offset.0, offset: other.offset.0,
@ -179,7 +190,7 @@ impl From<SearchQueryGet> for SearchQuery {
hybrid, hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
locales: other.locales.map(|o| o.into_iter().collect()), locales: other.locales.map(|o| o.into_iter().collect()),
} })
} }
} }
@ -219,7 +230,7 @@ pub async fn search_with_url_query(
debug!(parameters = ?params, "Search get"); debug!(parameters = ?params, "Search get");
let index_uid = IndexUid::try_from(index_uid.into_inner())?; let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let mut query: SearchQuery = params.into_inner().into(); let mut query: SearchQuery = params.into_inner().try_into()?;
// Tenant token search_rules. // Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) { if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
@ -312,44 +323,36 @@ pub fn search_kind(
features.check_vector("Passing `hybrid` as a parameter")?; features.check_vector("Passing `hybrid` as a parameter")?;
} }
// regardless of anything, always do a keyword search when we don't have a vector and the query is whitespace or missing // handle with care, the order of cases matters, the semantics is subtle
if query.vector.is_none() { match (query.q.as_deref(), &query.hybrid, query.vector.as_deref()) {
match &query.q { // empty query, no vector => placeholder search
Some(q) if q.trim().is_empty() => return Ok(SearchKind::KeywordOnly), (Some(q), _, None) if q.trim().is_empty() => Ok(SearchKind::KeywordOnly),
None => return Ok(SearchKind::KeywordOnly), // no query, no vector => placeholder search
_ => {} (None, _, None) => Ok(SearchKind::KeywordOnly),
// hybrid.semantic_ratio == 1.0 => vector
(_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
} }
} // hybrid.semantic_ratio == 0.0 => keyword
(_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
match &query.hybrid {
Some(HybridQuery { semantic_ratio, embedder }) if **semantic_ratio == 1.0 => {
Ok(SearchKind::semantic(
index_scheduler,
index,
embedder.as_deref(),
query.vector.as_ref().map(Vec::len),
)?)
}
Some(HybridQuery { semantic_ratio, embedder: _ }) if **semantic_ratio == 0.0 => {
Ok(SearchKind::KeywordOnly) Ok(SearchKind::KeywordOnly)
} }
Some(HybridQuery { semantic_ratio, embedder }) => Ok(SearchKind::hybrid( // no query, hybrid, vector => semantic
(None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
}
// query, no hybrid, no vector => keyword
(Some(_), None, None) => Ok(SearchKind::KeywordOnly),
// query, hybrid, maybe vector => hybrid
(Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
index_scheduler, index_scheduler,
index, index,
embedder.as_deref(), embedder,
**semantic_ratio, **semantic_ratio,
query.vector.as_ref().map(Vec::len), v.map(|v| v.len()),
)?), ),
None => match (query.q.as_deref(), query.vector.as_deref()) {
(_query, None) => Ok(SearchKind::KeywordOnly), (_, None, Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
(None, Some(_vector)) => Ok(SearchKind::semantic(
index_scheduler,
index,
None,
query.vector.as_ref().map(Vec::len),
)?),
(Some(_), Some(_)) => Err(MeilisearchHttpError::MissingSearchHybrid.into()),
},
} }
} }

View File

@ -643,12 +643,19 @@ fn embedder_analytics(
.max() .max()
}); });
let binary_quantization_used = setting.as_ref().map(|map| {
map.values()
.filter_map(|config| config.clone().set())
.any(|config| config.binary_quantized.set().is_some())
});
json!( json!(
{ {
"total": setting.as_ref().map(|s| s.len()), "total": setting.as_ref().map(|s| s.len()),
"sources": sources, "sources": sources,
"document_template_used": document_template_used, "document_template_used": document_template_used,
"document_template_max_bytes": document_template_max_bytes "document_template_max_bytes": document_template_max_bytes,
"binary_quantization_used": binary_quantization_used,
} }
) )
} }

View File

@ -102,8 +102,8 @@ async fn similar(
let index = index_scheduler.index(&index_uid)?; let index = index_scheduler.index(&index_uid)?;
let (embedder_name, embedder) = let (embedder_name, embedder, quantized) =
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
tokio::task::spawn_blocking(move || { tokio::task::spawn_blocking(move || {
perform_similar( perform_similar(
@ -111,6 +111,7 @@ async fn similar(
query, query,
embedder_name, embedder_name,
embedder, embedder,
quantized,
retrieve_vectors, retrieve_vectors,
index_scheduler.features(), index_scheduler.features(),
) )
@ -139,8 +140,8 @@ pub struct SimilarQueryGet {
show_ranking_score_details: Param<bool>, show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)] #[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>, pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)] #[deserr(error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: Option<String>, pub embedder: String,
} }
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]

View File

@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec};
use actix_http::StatusCode; use actix_http::StatusCode;
use index_scheduler::{IndexScheduler, RoFeatures}; use index_scheduler::{IndexScheduler, RoFeatures};
use indexmap::IndexMap;
use meilisearch_types::deserr::DeserrJsonError; use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::{ use meilisearch_types::error::deserr_codes::{
InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet,
InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit,
InvalidSearchOffset,
}; };
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
use meilisearch_types::milli::{self, DocumentId, TimeBudget}; use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde::Serialize; use serde::Serialize;
use super::ranking_rules::{self, RankingRules}; use super::ranking_rules::{self, RankingRules};
use super::{ use super::{
prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats,
SearchQuery, SearchQueryWithIndex, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex,
}; };
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
use crate::routes::indexes::search::search_kind; use crate::routes::indexes::search::search_kind;
@ -73,6 +77,17 @@ pub struct Federation {
pub limit: usize, pub limit: usize,
#[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize, pub offset: usize,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)]
pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>,
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)]
pub merge_facets: Option<MergeFacets>,
}
#[derive(Copy, Clone, Debug, deserr::Deserr, Default)]
#[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)]
pub struct MergeFacets {
#[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)]
pub max_values_per_facet: Option<usize>,
} }
#[derive(Debug, deserr::Deserr)] #[derive(Debug, deserr::Deserr)]
@ -82,7 +97,7 @@ pub struct FederatedSearch {
#[deserr(default)] #[deserr(default)]
pub federation: Option<Federation>, pub federation: Option<Federation>,
} }
#[derive(Serialize, Clone, PartialEq)] #[derive(Serialize, Clone)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct FederatedSearchResult { pub struct FederatedSearchResult {
pub hits: Vec<SearchHit>, pub hits: Vec<SearchHit>,
@ -93,6 +108,13 @@ pub struct FederatedSearchResult {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub semantic_hit_count: Option<u32>, pub semantic_hit_count: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
#[serde(skip_serializing_if = "FederatedFacets::is_empty")]
pub facets_by_index: FederatedFacets,
// These fields are only used for analytics purposes // These fields are only used for analytics purposes
#[serde(skip)] #[serde(skip)]
pub degraded: bool, pub degraded: bool,
@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult {
semantic_hit_count, semantic_hit_count,
degraded, degraded,
used_negative_operator, used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
} = self; } = self;
let mut debug = f.debug_struct("SearchResult"); let mut debug = f.debug_struct("SearchResult");
@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult {
if *degraded { if *degraded {
debug.field("degraded", degraded); debug.field("degraded", degraded);
} }
if let Some(facet_distribution) = facet_distribution {
debug.field("facet_distribution", &facet_distribution);
}
if let Some(facet_stats) = facet_stats {
debug.field("facet_stats", &facet_stats);
}
if let Some(semantic_hit_count) = semantic_hit_count { if let Some(semantic_hit_count) = semantic_hit_count {
debug.field("semantic_hit_count", &semantic_hit_count); debug.field("semantic_hit_count", &semantic_hit_count);
} }
if !facets_by_index.is_empty() {
debug.field("facets_by_index", &facets_by_index);
}
debug.finish() debug.finish()
} }
@ -313,16 +347,104 @@ struct SearchHitByIndex {
} }
struct SearchResultByIndex { struct SearchResultByIndex {
index: String,
hits: Vec<SearchHitByIndex>, hits: Vec<SearchHitByIndex>,
candidates: RoaringBitmap, estimated_total_hits: usize,
degraded: bool, degraded: bool,
used_negative_operator: bool, used_negative_operator: bool,
facets: Option<ComputedFacets>,
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>);
impl FederatedFacets {
pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) {
if let Some(facets) = facets {
self.0.insert(index, facets);
}
}
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
pub fn merge(
self,
MergeFacets { max_values_per_facet }: MergeFacets,
facet_order: BTreeMap<String, (String, OrderBy)>,
) -> Option<ComputedFacets> {
if self.is_empty() {
return None;
}
let mut distribution: BTreeMap<String, _> = Default::default();
let mut stats: BTreeMap<String, FacetStats> = Default::default();
for facets_by_index in self.0.into_values() {
for (facet, index_distribution) in facets_by_index.distribution {
match distribution.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_distribution);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let distribution = entry.get_mut();
for (value, index_count) in index_distribution {
distribution
.entry(value)
.and_modify(|count| *count += index_count)
.or_insert(index_count);
}
}
}
}
for (facet, index_stats) in facets_by_index.stats {
match stats.entry(facet) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(index_stats);
}
std::collections::btree_map::Entry::Occupied(mut entry) => {
let stats = entry.get_mut();
stats.min = f64::min(stats.min, index_stats.min);
stats.max = f64::max(stats.max, index_stats.max);
}
}
}
}
// fixup order
for (facet, values) in &mut distribution {
let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default();
match order_by {
OrderBy::Lexicographic => {
values.sort_unstable_by(|left, _, right, _| left.cmp(right))
}
OrderBy::Count => {
values.sort_unstable_by(|_, left, _, right| {
left.cmp(right)
// biggest first
.reverse()
})
}
}
if let Some(max_values_per_facet) = max_values_per_facet {
values.truncate(max_values_per_facet)
};
}
Some(ComputedFacets { distribution, stats })
}
} }
pub fn perform_federated_search( pub fn perform_federated_search(
index_scheduler: &IndexScheduler, index_scheduler: &IndexScheduler,
queries: Vec<SearchQueryWithIndex>, queries: Vec<SearchQueryWithIndex>,
federation: Federation, mut federation: Federation,
features: RoFeatures, features: RoFeatures,
) -> Result<FederatedSearchResult, ResponseError> { ) -> Result<FederatedSearchResult, ResponseError> {
let before_search = std::time::Instant::now(); let before_search = std::time::Instant::now();
@ -342,6 +464,16 @@ pub fn perform_federated_search(
.into()); .into());
} }
if let Some(facets) = federated_query.has_facets() {
let facets = facets.to_owned();
return Err(MeilisearchHttpError::FacetsInFederatedQuery(
query_index,
federated_query.index_uid.into_inner(),
facets,
)
.into());
}
let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
@ -353,13 +485,24 @@ pub fn perform_federated_search(
// 2. perform queries, merge and make hits index by index // 2. perform queries, merge and make hits index by index
let required_hit_count = federation.limit + federation.offset; let required_hit_count = federation.limit + federation.offset;
// In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
// Then in step (3), we'll update its value if there is any semantic search // Then in step (3), we'll update its value if there is any semantic search
let mut semantic_hit_count = None; let mut semantic_hit_count = None;
let mut results_by_index = Vec::with_capacity(queries_by_index.len()); let mut results_by_index = Vec::with_capacity(queries_by_index.len());
let mut previous_query_data: Option<(RankingRules, usize, String)> = None; let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
// remember the order and name of first index for each facet when merging with index settings
// to detect if the order is inconsistent for a facet.
let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets
{
Some(MergeFacets { .. }) => Some(Default::default()),
_ => None,
};
for (index_uid, queries) in queries_by_index { for (index_uid, queries) in queries_by_index {
let first_query_index = queries.first().map(|query| query.query_index);
let index = match index_scheduler.index(&index_uid) { let index = match index_scheduler.index(&index_uid) {
Ok(index) => index, Ok(index) => index,
Err(err) => { Err(err) => {
@ -367,9 +510,8 @@ pub fn perform_federated_search(
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL. // here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST; err.code = StatusCode::BAD_REQUEST;
if let Some(query) = queries.first() { if let Some(query_index) = first_query_index {
err.message = err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message);
format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
} }
return Err(err); return Err(err);
} }
@ -394,6 +536,23 @@ pub fn perform_federated_search(
let mut used_negative_operator = false; let mut used_negative_operator = false;
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten();
// TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}{}",
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
return Err(error);
}
// 2.1. Compute all candidates for each query in the index // 2.1. Compute all candidates for each query in the index
let mut results_by_query = Vec::with_capacity(queries.len()); let mut results_by_query = Vec::with_capacity(queries.len());
@ -562,34 +721,116 @@ pub fn perform_federated_search(
.collect(); .collect();
let merged_result = merged_result?; let merged_result = merged_result?;
results_by_index.push(SearchResultByIndex {
hits: merged_result, let estimated_total_hits = candidates.len() as usize;
let facets = facets_by_index
.map(|facets_by_index| {
compute_facet_distribution_stats(
&facets_by_index,
&index,
&rtxn,
candidates, candidates,
super::Route::MultiSearch,
)
})
.transpose()
.map_err(|mut error| {
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}{}",
error.message,
if let Some(query_index) = first_query_index {
format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`")
} else {
Default::default()
}
);
error
})?;
results_by_index.push(SearchResultByIndex {
index: index_uid,
hits: merged_result,
estimated_total_hits,
degraded, degraded,
used_negative_operator, used_negative_operator,
facets,
}); });
} }
// bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index.
for (index_uid, facets) in federation.facets_by_index {
let index = match index_scheduler.index(&index_uid) {
Ok(index) => index,
Err(err) => {
let mut err = ResponseError::from(err);
// Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
// here the resource not found is not part of the URL.
err.code = StatusCode::BAD_REQUEST;
err.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries",
err.message
);
return Err(err);
}
};
// Important: this is the only transaction we'll use for this index during this federated search
let rtxn = index.read_txn()?;
if let Err(mut error) =
check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn)
{
error.message = format!(
"Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries",
);
return Err(error);
}
if let Some(facets) = facets {
if let Err(mut error) = compute_facet_distribution_stats(
&facets,
&index,
&rtxn,
Default::default(),
super::Route::MultiSearch,
) {
error.message =
format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message);
return Err(error);
}
}
}
// 3. merge hits and metadata across indexes // 3. merge hits and metadata across indexes
// 3.1 merge metadata // 3.1 merge metadata
let (estimated_total_hits, degraded, used_negative_operator) = { let (estimated_total_hits, degraded, used_negative_operator, facets) = {
let mut estimated_total_hits = 0; let mut estimated_total_hits = 0;
let mut degraded = false; let mut degraded = false;
let mut used_negative_operator = false; let mut used_negative_operator = false;
let mut facets: FederatedFacets = FederatedFacets::default();
for SearchResultByIndex { for SearchResultByIndex {
index,
hits: _, hits: _,
candidates, estimated_total_hits: estimated_total_hits_by_index,
facets: facets_by_index,
degraded: degraded_by_index, degraded: degraded_by_index,
used_negative_operator: used_negative_operator_by_index, used_negative_operator: used_negative_operator_by_index,
} in &results_by_index } in &mut results_by_index
{ {
estimated_total_hits += candidates.len() as usize; estimated_total_hits += *estimated_total_hits_by_index;
degraded |= *degraded_by_index; degraded |= *degraded_by_index;
used_negative_operator |= *used_negative_operator_by_index; used_negative_operator |= *used_negative_operator_by_index;
let facets_by_index = std::mem::take(facets_by_index);
let index = std::mem::take(index);
facets.insert(index, facets_by_index);
} }
(estimated_total_hits, degraded, used_negative_operator) (estimated_total_hits, degraded, used_negative_operator, facets)
}; };
// 3.2 merge hits // 3.2 merge hits
@ -606,6 +847,20 @@ pub fn perform_federated_search(
.map(|hit| hit.hit) .map(|hit| hit.hit)
.collect(); .collect();
let (facet_distribution, facet_stats, facets_by_index) =
match federation.merge_facets.zip(facet_order) {
Some((merge_facets, facet_order)) => {
let facets = facets.merge(merge_facets, facet_order);
let (facet_distribution, facet_stats) = facets
.map(|ComputedFacets { distribution, stats }| (distribution, stats))
.unzip();
(facet_distribution, facet_stats, FederatedFacets::default())
}
None => (None, None, facets),
};
let search_result = FederatedSearchResult { let search_result = FederatedSearchResult {
hits: merged_hits, hits: merged_hits,
processing_time_ms: before_search.elapsed().as_millis(), processing_time_ms: before_search.elapsed().as_millis(),
@ -617,7 +872,39 @@ pub fn perform_federated_search(
semantic_hit_count, semantic_hit_count,
degraded, degraded,
used_negative_operator, used_negative_operator,
facet_distribution,
facet_stats,
facets_by_index,
}; };
Ok(search_result) Ok(search_result)
} }
fn check_facet_order(
facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>,
current_index: &str,
facets_by_index: &Option<Vec<String>>,
index: &milli::Index,
rtxn: &milli::heed::RoTxn<'_>,
) -> Result<(), ResponseError> {
if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) {
let index_facet_order = index.sort_facet_values_by(rtxn)?;
for facet in facets_by_index {
let index_facet_order = index_facet_order.get(facet);
let (previous_index, previous_facet_order) = facet_order
.entry(facet.to_owned())
.or_insert_with(|| (current_index.to_owned(), index_facet_order));
if previous_facet_order != &index_facet_order {
return Err(MeilisearchHttpError::InconsistentFacetOrder {
facet: facet.clone(),
previous_facet_order: *previous_facet_order,
previous_uid: previous_index.clone(),
current_uid: current_index.to_owned(),
index_facet_order,
}
.into());
}
}
};
Ok(())
}

View File

@ -267,58 +267,54 @@ impl fmt::Debug for SearchQuery {
pub struct HybridQuery { pub struct HybridQuery {
#[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
pub semantic_ratio: SemanticRatio, pub semantic_ratio: SemanticRatio,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)] #[deserr(error = DeserrJsonError<InvalidEmbedder>)]
pub embedder: Option<String>, pub embedder: String,
} }
#[derive(Clone)] #[derive(Clone)]
pub enum SearchKind { pub enum SearchKind {
KeywordOnly, KeywordOnly,
SemanticOnly { embedder_name: String, embedder: Arc<Embedder> }, SemanticOnly { embedder_name: String, embedder: Arc<Embedder>, quantized: bool },
Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 }, Hybrid { embedder_name: String, embedder: Arc<Embedder>, quantized: bool, semantic_ratio: f32 },
} }
impl SearchKind { impl SearchKind {
pub(crate) fn semantic( pub(crate) fn semantic(
index_scheduler: &index_scheduler::IndexScheduler, index_scheduler: &index_scheduler::IndexScheduler,
index: &Index, index: &Index,
embedder_name: Option<&str>, embedder_name: &str,
vector_len: Option<usize>, vector_len: Option<usize>,
) -> Result<Self, ResponseError> { ) -> Result<Self, ResponseError> {
let (embedder_name, embedder) = let (embedder_name, embedder, quantized) =
Self::embedder(index_scheduler, index, embedder_name, vector_len)?; Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
Ok(Self::SemanticOnly { embedder_name, embedder }) Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
} }
pub(crate) fn hybrid( pub(crate) fn hybrid(
index_scheduler: &index_scheduler::IndexScheduler, index_scheduler: &index_scheduler::IndexScheduler,
index: &Index, index: &Index,
embedder_name: Option<&str>, embedder_name: &str,
semantic_ratio: f32, semantic_ratio: f32,
vector_len: Option<usize>, vector_len: Option<usize>,
) -> Result<Self, ResponseError> { ) -> Result<Self, ResponseError> {
let (embedder_name, embedder) = let (embedder_name, embedder, quantized) =
Self::embedder(index_scheduler, index, embedder_name, vector_len)?; Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
Ok(Self::Hybrid { embedder_name, embedder, semantic_ratio }) Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
} }
pub(crate) fn embedder( pub(crate) fn embedder(
index_scheduler: &index_scheduler::IndexScheduler, index_scheduler: &index_scheduler::IndexScheduler,
index: &Index, index: &Index,
embedder_name: Option<&str>, embedder_name: &str,
vector_len: Option<usize>, vector_len: Option<usize>,
) -> Result<(String, Arc<Embedder>), ResponseError> { ) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?; let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?; let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder_name = embedder_name.unwrap_or_else(|| embedders.get_default_embedder_name()); let (embedder, _, quantized) = embedders
.get(embedder_name)
let embedder = embedders.get(embedder_name);
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned())) .ok_or(milli::UserError::InvalidEmbedder(embedder_name.to_owned()))
.map_err(milli::Error::from)? .map_err(milli::Error::from)?;
.0;
if let Some(vector_len) = vector_len { if let Some(vector_len) = vector_len {
if vector_len != embedder.dimensions() { if vector_len != embedder.dimensions() {
@ -332,7 +328,7 @@ impl SearchKind {
} }
} }
Ok((embedder_name.to_owned(), embedder)) Ok((embedder_name.to_owned(), embedder, quantized))
} }
} }
@ -441,9 +437,6 @@ pub struct SearchQueryWithIndex {
} }
impl SearchQueryWithIndex { impl SearchQueryWithIndex {
pub fn has_federation_options(&self) -> bool {
self.federation_options.is_some()
}
pub fn has_pagination(&self) -> Option<&'static str> { pub fn has_pagination(&self) -> Option<&'static str> {
if self.offset.is_some() { if self.offset.is_some() {
Some("offset") Some("offset")
@ -458,6 +451,10 @@ impl SearchQueryWithIndex {
} }
} }
pub fn has_facets(&self) -> Option<&[String]> {
self.facets.as_deref().filter(|v| !v.is_empty())
}
pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
let SearchQueryWithIndex { let SearchQueryWithIndex {
index_uid, index_uid,
@ -537,8 +534,8 @@ pub struct SimilarQuery {
pub limit: usize, pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)] #[deserr(default, error = DeserrJsonError<InvalidSimilarFilter>)]
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)] #[deserr(error = DeserrJsonError<InvalidEmbedder>)]
pub embedder: Option<String>, pub embedder: String,
#[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)] #[deserr(default, error = DeserrJsonError<InvalidSimilarAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>, pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)] #[deserr(default, error = DeserrJsonError<InvalidSimilarRetrieveVectors>)]
@ -792,7 +789,7 @@ fn prepare_search<'t>(
search.query(q); search.query(q);
} }
} }
SearchKind::SemanticOnly { embedder_name, embedder } => { SearchKind::SemanticOnly { embedder_name, embedder, quantized } => {
let vector = match query.vector.clone() { let vector = match query.vector.clone() {
Some(vector) => vector, Some(vector) => vector,
None => { None => {
@ -806,14 +803,19 @@ fn prepare_search<'t>(
} }
}; };
search.semantic(embedder_name.clone(), embedder.clone(), Some(vector)); search.semantic(embedder_name.clone(), embedder.clone(), *quantized, Some(vector));
} }
SearchKind::Hybrid { embedder_name, embedder, semantic_ratio: _ } => { SearchKind::Hybrid { embedder_name, embedder, quantized, semantic_ratio: _ } => {
if let Some(q) = &query.q { if let Some(q) = &query.q {
search.query(q); search.query(q);
} }
// will be embedded in hybrid search if necessary // will be embedded in hybrid search if necessary
search.semantic(embedder_name.clone(), embedder.clone(), query.vector.clone()); search.semantic(
embedder_name.clone(),
embedder.clone(),
*quantized,
query.vector.clone(),
);
} }
} }
@ -987,39 +989,13 @@ pub fn perform_search(
HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits }
}; };
let (facet_distribution, facet_stats) = match facets { let (facet_distribution, facet_stats) = facets
Some(ref fields) => { .map(move |facets| {
let mut facet_distribution = index.facets_distribution(&rtxn); compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search)
})
let max_values_by_facet = index .transpose()?
.max_values_per_facet(&rtxn) .map(|ComputedFacets { distribution, stats }| (distribution, stats))
.map_err(milli::Error::from)? .unzip();
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
let sort_facet_values_by =
index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?;
if fields.iter().all(|f| f != "*") {
let fields: Vec<_> =
fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect();
facet_distribution.facets(fields);
}
let distribution = facet_distribution
.candidates(candidates)
.default_order_by(sort_facet_values_by.get("*"))
.execute()?;
let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats))
}
None => (None, None),
};
let facet_stats = facet_stats.map(|stats| {
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
});
let result = SearchResult { let result = SearchResult {
hits: documents, hits: documents,
@ -1035,6 +1011,61 @@ pub fn perform_search(
Ok(result) Ok(result)
} }
#[derive(Debug, Clone, Default, Serialize)]
pub struct ComputedFacets {
pub distribution: BTreeMap<String, IndexMap<String, u64>>,
pub stats: BTreeMap<String, FacetStats>,
}
enum Route {
Search,
MultiSearch,
}
fn compute_facet_distribution_stats<S: AsRef<str>>(
facets: &[S],
index: &Index,
rtxn: &RoTxn,
candidates: roaring::RoaringBitmap,
route: Route,
) -> Result<ComputedFacets, ResponseError> {
let mut facet_distribution = index.facets_distribution(rtxn);
let max_values_by_facet = index
.max_values_per_facet(rtxn)
.map_err(milli::Error::from)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?;
// add specific facet if there is no placeholder
if facets.iter().all(|f| f.as_ref() != "*") {
let fields: Vec<_> =
facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect();
facet_distribution.facets(fields);
}
let distribution = facet_distribution
.candidates(candidates)
.default_order_by(sort_facet_values_by.get("*"))
.execute()
.map_err(|error| match (error, route) {
(
error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution {
..
}),
Route::MultiSearch,
) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets),
(error, _) => error.into(),
})?;
let stats = facet_distribution.compute_stats()?;
let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect();
Ok(ComputedFacets { distribution, stats })
}
pub fn search_from_kind( pub fn search_from_kind(
search_kind: SearchKind, search_kind: SearchKind,
search: milli::Search<'_>, search: milli::Search<'_>,
@ -1413,6 +1444,7 @@ pub fn perform_similar(
query: SimilarQuery, query: SimilarQuery,
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
quantized: bool,
retrieve_vectors: RetrieveVectors, retrieve_vectors: RetrieveVectors,
features: RoFeatures, features: RoFeatures,
) -> Result<SimilarResult, ResponseError> { ) -> Result<SimilarResult, ResponseError> {
@ -1441,8 +1473,16 @@ pub fn perform_similar(
)); ));
}; };
let mut similar = let mut similar = milli::Similar::new(
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); internal_id,
offset,
limit,
index,
&rtxn,
embedder_name,
embedder,
quantized,
);
if let Some(ref filter) = query.filter { if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {

View File

@ -136,7 +136,7 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter", "code": "invalid_document_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter" "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -525,7 +525,7 @@ async fn delete_document_by_filter() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_filter", "code": "invalid_document_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter" "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -723,7 +723,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter", "code": "invalid_document_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter" "link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -646,7 +646,7 @@ async fn filter_invalid_syntax_object() {
.search(json!({"filter": "title & Glass"}), |response, code| { .search(json!({"filter": "title & Glass"}), |response, code| {
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -669,7 +669,7 @@ async fn filter_invalid_syntax_array() {
.search(json!({"filter": ["title & Glass"]}), |response, code| { .search(json!({"filter": ["title & Glass"]}), |response, code| {
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -1163,7 +1163,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled", "code": "feature_not_enabled",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled" "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@ -1176,7 +1176,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir", "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
"code": "feature_not_enabled", "code": "feature_not_enabled",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled" "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@ -1192,7 +1192,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled", "code": "feature_not_enabled",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled" "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
@ -1204,7 +1204,7 @@ async fn search_with_contains_without_enabling_the_feature() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir", "message": "Using `CONTAINS` or `STARTS WITH` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled", "code": "feature_not_enabled",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled" "link": "https://docs.meilisearch.com/errors#feature_not_enabled"

View File

@ -128,7 +128,7 @@ async fn simple_search() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -137,7 +137,7 @@ async fn simple_search() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -146,7 +146,7 @@ async fn simple_search() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -161,7 +161,7 @@ async fn limit_offset() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -174,7 +174,7 @@ async fn limit_offset() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -188,8 +188,11 @@ async fn simple_search_hf() {
let server = Server::new().await; let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await; let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = let (response, code) = index
index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await; .search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
)
.await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"0"); snapshot!(response["semanticHitCount"], @"0");
@ -197,7 +200,7 @@ async fn simple_search_hf() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
// disable ranking score as the vectors between architectures are not equal // disable ranking score as the vectors between architectures are not equal
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}), json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -206,7 +209,7 @@ async fn simple_search_hf() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}), json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -215,7 +218,7 @@ async fn simple_search_hf() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -224,7 +227,7 @@ async fn simple_search_hf() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}), json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -237,7 +240,7 @@ async fn distribution_shift() {
let server = Server::new().await; let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await; let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true}); let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
let (response, code) = index.search_post(search.clone()).await; let (response, code) = index.search_post(search.clone()).await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###); snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
@ -271,7 +274,7 @@ async fn highlighter() {
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.2}, "hybrid": {"embedder": "default", "semanticRatio": 0.2},
"retrieveVectors": true, "retrieveVectors": true,
"attributesToHighlight": [ "attributesToHighlight": [
"desc", "desc",
@ -287,7 +290,7 @@ async fn highlighter() {
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 0.8}, "hybrid": {"embedder": "default", "semanticRatio": 0.8},
"retrieveVectors": true, "retrieveVectors": true,
"showRankingScore": true, "showRankingScore": true,
"attributesToHighlight": [ "attributesToHighlight": [
@ -304,7 +307,7 @@ async fn highlighter() {
// no highlighting on full semantic // no highlighting on full semantic
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0], .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
"hybrid": {"semanticRatio": 1.0}, "hybrid": {"embedder": "default", "semanticRatio": 1.0},
"retrieveVectors": true, "retrieveVectors": true,
"showRankingScore": true, "showRankingScore": true,
"attributesToHighlight": [ "attributesToHighlight": [
@ -326,7 +329,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
) )
.await; .await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
@ -341,7 +344,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}), json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
) )
.await; .await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
@ -357,7 +360,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index let (response, code) = index
.search_get( .search_get(
&yaup::to_string( &yaup::to_string(
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}), &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
) )
.unwrap(), .unwrap(),
) )
@ -375,7 +378,7 @@ async fn invalid_semantic_ratio() {
let (response, code) = index let (response, code) = index
.search_get( .search_get(
&yaup::to_string( &yaup::to_string(
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}), &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
) )
.unwrap(), .unwrap(),
) )
@ -398,7 +401,7 @@ async fn single_document() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
) )
.await; .await;
@ -414,7 +417,7 @@ async fn query_combination() {
// search without query and vector, but with hybrid => still placeholder // search without query and vector, but with hybrid => still placeholder
let (response, code) = index let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -423,7 +426,7 @@ async fn query_combination() {
// same with a different semantic ratio // same with a different semantic ratio
let (response, code) = index let (response, code) = index
.search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -432,7 +435,7 @@ async fn query_combination() {
// wrong vector dimensions // wrong vector dimensions
let (response, code) = index let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.await; .await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
@ -447,7 +450,7 @@ async fn query_combination() {
// full vector // full vector
let (response, code) = index let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -456,7 +459,7 @@ async fn query_combination() {
// full keyword, without a query // full keyword, without a query
let (response, code) = index let (response, code) = index
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -465,7 +468,7 @@ async fn query_combination() {
// query + vector, full keyword => keyword // query + vector, full keyword => keyword
let (response, code) = index let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true})) .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -480,7 +483,7 @@ async fn query_combination() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.", "message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
"code": "missing_search_hybrid", "code": "missing_search_hybrid",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid" "link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
@ -490,7 +493,7 @@ async fn query_combination() {
// full vector, without a vector => error // full vector, without a vector => error
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
) )
.await; .await;
@ -507,7 +510,7 @@ async fn query_combination() {
// hybrid without a vector => full keyword // hybrid without a vector => full keyword
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}), json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
) )
.await; .await;
@ -523,7 +526,7 @@ async fn retrieve_vectors() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -573,7 +576,7 @@ async fn retrieve_vectors() {
let (response, code) = index let (response, code) = index
.search_post( .search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true}), json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
) )
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");

View File

@ -922,7 +922,7 @@ async fn invalid_locales() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`",
"code": "invalid_search_locales", "code": "invalid_search_locales",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_locales" "link": "https://docs.meilisearch.com/errors#invalid_search_locales"
@ -935,7 +935,7 @@ async fn invalid_locales() {
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
"message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul", "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bg, bn, ca, cs, da, de, el, en, eo, es, et, fa, fi, fr, gu, he, hi, hr, hu, hy, id, it, ja, jv, ka, km, kn, ko, la, lt, lv, mk, ml, mr, my, nb, ne, nl, or, pa, pl, pt, ro, ru, si, sk, sl, sn, sr, sv, ta, te, th, tk, tl, tr, uk, ur, uz, vi, yi, zh, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zho, zul",
"code": "invalid_search_locales", "code": "invalid_search_locales",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_locales" "link": "https://docs.meilisearch.com/errors#invalid_search_locales"
@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() {
.await; .await;
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zh`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `cmn`",
"code": "invalid_settings_localized_attributes", "code": "invalid_settings_localized_attributes",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
@ -1143,3 +1143,195 @@ async fn facet_search_with_localized_attributes() {
} }
"###); "###);
} }
#[actix_rt::test]
async fn swedish_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{"id": "tra1-1", "product": "trä"},
{"id": "tra2-1", "product": "traktor"},
{"id": "tra1-2", "product": "träbjälke"},
{"id": "tra2-2", "product": "trafiksignal"},
]);
index.add_documents(documents, None).await;
let (_response, _) = index
.update_settings(json!({
"searchableAttributes": ["product"],
"localizedAttributes": [
// force swedish
{"attributePatterns": ["product"], "locales": ["swe"]}
]
}))
.await;
index.wait_task(1).await;
// infer swedish
index
.search(json!({"q": "trä", "attributesToRetrieve": ["product"]}), |response, code| {
snapshot!(response, @r###"
{
"hits": [
{
"product": "trä"
},
{
"product": "träbjälke"
}
],
"query": "trä",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
"###);
snapshot!(code, @"200 OK");
})
.await;
index
.search(json!({"q": "tra", "attributesToRetrieve": ["product"]}), |response, code| {
snapshot!(response, @r###"
{
"hits": [
{
"product": "traktor"
},
{
"product": "trafiksignal"
}
],
"query": "tra",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
"###);
snapshot!(code, @"200 OK");
})
.await;
// force swedish
index
.search(
json!({"q": "trä", "locales": ["swe"], "attributesToRetrieve": ["product"]}),
|response, code| {
snapshot!(response, @r###"
{
"hits": [
{
"product": "trä"
},
{
"product": "träbjälke"
}
],
"query": "trä",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
"###);
snapshot!(code, @"200 OK");
},
)
.await;
index
.search(
json!({"q": "tra", "locales": ["swe"], "attributesToRetrieve": ["product"]}),
|response, code| {
snapshot!(response, @r###"
{
"hits": [
{
"product": "traktor"
},
{
"product": "trafiksignal"
}
],
"query": "tra",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
"###);
snapshot!(code, @"200 OK");
},
)
.await;
}
#[actix_rt::test]
async fn german_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{"id": 1, "product": "Interkulturalität"},
{"id": 2, "product": "Wissensorganisation"},
]);
index.add_documents(documents, None).await;
let (_response, _) = index
.update_settings(json!({
"searchableAttributes": ["product"],
"localizedAttributes": [
// force swedish
{"attributePatterns": ["product"], "locales": ["deu"]}
]
}))
.await;
index.wait_task(1).await;
// infer swedish
index
.search(
json!({"q": "kulturalität", "attributesToRetrieve": ["product"]}),
|response, code| {
snapshot!(response, @r###"
{
"hits": [
{
"product": "Interkulturalität"
}
],
"query": "kulturalität",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"###);
snapshot!(code, @"200 OK");
},
)
.await;
index
.search(
json!({"q": "organisation", "attributesToRetrieve": ["product"]}),
|response, code| {
snapshot!(response, @r###"
{
"hits": [
{
"product": "Wissensorganisation"
}
],
"query": "organisation",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"###);
snapshot!(code, @"200 OK");
},
)
.await;
}

View File

@ -1099,11 +1099,17 @@ async fn experimental_feature_vector_store() {
index.add_documents(json!(documents), None).await; index.add_documents(json!(documents), None).await;
index.wait_task(0).await; index.wait_task(0).await;
index let (response, code) = index
.search(json!({ .search_post(json!({
"vector": [1.0, 2.0, 3.0], "vector": [1.0, 2.0, 3.0],
"hybrid": {
"embedder": "manual",
},
"showRankingScore": true "showRankingScore": true
}), |response, code|{ }))
.await;
{
meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###" meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{ {
@ -1113,8 +1119,8 @@ async fn experimental_feature_vector_store() {
"link": "https://docs.meilisearch.com/errors#feature_not_enabled" "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
} }
"###); "###);
}) }
.await;
index index
.search(json!({ .search(json!({
"retrieveVectors": true, "retrieveVectors": true,
@ -1162,6 +1168,9 @@ async fn experimental_feature_vector_store() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"vector": [1.0, 2.0, 3.0], "vector": [1.0, 2.0, 3.0],
"hybrid": {
"embedder": "manual",
},
"showRankingScore": true, "showRankingScore": true,
"retrieveVectors": true, "retrieveVectors": true,
})) }))

File diff suppressed because it is too large Load Diff

View File

@ -18,7 +18,7 @@ async fn similar_unexisting_index() {
}); });
index index
.similar(json!({"id": 287947}), |response, code| { .similar(json!({"id": 287947, "embedder": "manual"}), |response, code| {
assert_eq!(code, 404); assert_eq!(code, 404);
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
}) })
@ -44,7 +44,7 @@ async fn similar_feature_not_enabled() {
let server = Server::new().await; let server = Server::new().await;
let index = server.index("test"); let index = server.index("test");
let (response, code) = index.similar_post(json!({"id": 287947})).await; let (response, code) = index.similar_post(json!({"id": 287947, "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -199,7 +199,8 @@ async fn similar_not_found_id() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": "definitely-doesnt-exist"})).await; let (response, code) =
index.similar_post(json!({"id": "definitely-doesnt-exist", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -230,7 +231,8 @@ async fn similar_bad_offset() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": 287947, "offset": "doggo"})).await; let (response, code) =
index.similar_post(json!({"id": 287947, "offset": "doggo", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -241,7 +243,7 @@ async fn similar_bad_offset() {
} }
"###); "###);
let (response, code) = index.similar_get("?id=287947&offset=doggo").await; let (response, code) = index.similar_get("?id=287947&offset=doggo&embedder=manual").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -272,7 +274,8 @@ async fn similar_bad_limit() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await; server.wait_task(response.uid()).await;
let (response, code) = index.similar_post(json!({"id": 287947, "limit": "doggo"})).await; let (response, code) =
index.similar_post(json!({"id": 287947, "limit": "doggo", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -283,7 +286,7 @@ async fn similar_bad_limit() {
} }
"###); "###);
let (response, code) = index.similar_get("?id=287946&limit=doggo").await; let (response, code) = index.similar_get("?id=287946&limit=doggo&embedder=manual").await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -323,7 +326,8 @@ async fn similar_bad_filter() {
snapshot!(code, @"202 Accepted"); snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
let (response, code) = index.similar_post(json!({ "id": 287947, "filter": true })).await; let (response, code) =
index.similar_post(json!({ "id": 287947, "filter": true, "embedder": "manual" })).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -361,10 +365,10 @@ async fn filter_invalid_syntax_object() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { .similar(json!({"id": 287947, "filter": "title & Glass", "embedder": "manual"}), |response, code| {
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter", "code": "invalid_similar_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
@ -400,10 +404,10 @@ async fn filter_invalid_syntax_array() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { .similar(json!({"id": 287947, "filter": ["title & Glass"], "embedder": "manual"}), |response, code| {
snapshot!(response, @r###" snapshot!(response, @r###"
{ {
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter", "code": "invalid_similar_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
@ -446,7 +450,7 @@ async fn filter_invalid_syntax_string() {
}); });
index index
.similar( .similar(
json!({"id": 287947, "filter": "title = Glass XOR title = Glass"}), json!({"id": 287947, "filter": "title = Glass XOR title = Glass", "embedder": "manual"}),
|response, code| { |response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
@ -486,10 +490,13 @@ async fn filter_invalid_attribute_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": ["many = Glass"]}), |response, code| { .similar(
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -524,10 +531,13 @@ async fn filter_invalid_attribute_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": "many = Glass"}), |response, code| { .similar(
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -562,10 +572,13 @@ async fn filter_reserved_geo_attribute_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": ["_geo = Glass"]}), |response, code| { .similar(
json!({"id": 287947, "filter": ["_geo = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -600,10 +613,13 @@ async fn filter_reserved_geo_attribute_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": "_geo = Glass"}), |response, code| { .similar(
json!({"id": 287947, "filter": "_geo = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -638,10 +654,13 @@ async fn filter_reserved_attribute_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": ["_geoDistance = Glass"]}), |response, code| { .similar(
json!({"id": 287947, "filter": ["_geoDistance = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -676,10 +695,13 @@ async fn filter_reserved_attribute_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": "_geoDistance = Glass"}), |response, code| { .similar(
json!({"id": 287947, "filter": "_geoDistance = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -714,10 +736,13 @@ async fn filter_reserved_geo_point_array() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": ["_geoPoint = Glass"]}), |response, code| { .similar(
json!({"id": 287947, "filter": ["_geoPoint = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -752,10 +777,13 @@ async fn filter_reserved_geo_point_string() {
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter" "link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}); });
index index
.similar(json!({"id": 287947, "filter": "_geoPoint = Glass"}), |response, code| { .similar(
json!({"id": 287947, "filter": "_geoPoint = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response); assert_eq!(response, expected_response);
assert_eq!(code, 400); assert_eq!(code, 400);
}) },
)
.await; .await;
} }
@ -765,7 +793,8 @@ async fn similar_bad_retrieve_vectors() {
server.set_features(json!({"vectorStore": true})).await; server.set_features(json!({"vectorStore": true})).await;
let index = server.index("test"); let index = server.index("test");
let (response, code) = index.similar_post(json!({"retrieveVectors": "doggo"})).await; let (response, code) =
index.similar_post(json!({"retrieveVectors": "doggo", "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {
@ -776,7 +805,8 @@ async fn similar_bad_retrieve_vectors() {
} }
"###); "###);
let (response, code) = index.similar_post(json!({"retrieveVectors": [true]})).await; let (response, code) =
index.similar_post(json!({"retrieveVectors": [true], "embedder": "manual"})).await;
snapshot!(code, @"400 Bad Request"); snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###" snapshot!(json_string!(response), @r###"
{ {

View File

@ -80,7 +80,9 @@ async fn basic() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar(json!({"id": 143, "retrieveVectors": true}), |response, code| { .similar(
json!({"id": 143, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
[ [
@ -154,11 +156,14 @@ async fn basic() {
} }
] ]
"###); "###);
}) },
)
.await; .await;
index index
.similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| { .similar(
json!({"id": "299537", "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
[ [
@ -232,7 +237,8 @@ async fn basic() {
} }
] ]
"###); "###);
}) },
)
.await; .await;
} }
@ -272,7 +278,7 @@ async fn ranking_score_threshold() {
index index
.similar( .similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true}), json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0, "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"4");
@ -358,7 +364,7 @@ async fn ranking_score_threshold() {
index index
.similar( .similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true}), json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.2, "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"3");
@ -426,7 +432,7 @@ async fn ranking_score_threshold() {
index index
.similar( .similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true}), json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.3, "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"2");
@ -476,7 +482,7 @@ async fn ranking_score_threshold() {
index index
.similar( .similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true}), json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.6, "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1"); meili_snap::snapshot!(meili_snap::json_string!(response["estimatedTotalHits"]), @"1");
@ -508,7 +514,7 @@ async fn ranking_score_threshold() {
index index
.similar( .similar(
json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true}), json!({"id": 143, "showRankingScore": true, "rankingScoreThreshold": 0.9, "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @"[]"); snapshot!(json_string!(response["hits"]), @"[]");
@ -553,7 +559,7 @@ async fn filter() {
index index
.similar( .similar(
json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}), json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
@ -617,7 +623,7 @@ async fn filter() {
index index
.similar( .similar(
json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}), json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
@ -681,7 +687,9 @@ async fn limit_and_offset() {
index.wait_task(value.uid()).await; index.wait_task(value.uid()).await;
index index
.similar(json!({"id": 143, "limit": 1, "retrieveVectors": true}), |response, code| { .similar(
json!({"id": 143, "limit": 1, "retrieveVectors": true, "embedder": "manual"}),
|response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"
[ [
@ -704,12 +712,13 @@ async fn limit_and_offset() {
} }
] ]
"###); "###);
}) },
)
.await; .await;
index index
.similar( .similar(
json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}), json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true, "embedder": "manual"}),
|response, code| { |response, code| {
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###" snapshot!(json_string!(response["hits"]), @r###"

View File

@ -0,0 +1,380 @@
use meili_snap::{json_string, snapshot};
use crate::common::{GetAllDocumentsOptions, Server};
use crate::json;
use crate::vector::generate_default_user_provided_documents;
#[actix_rt::test]
async fn retrieve_binary_quantize_status_in_the_settings() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (settings, code) = index.settings().await;
snapshot!(code, @"200 OK");
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3}"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": false,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (settings, code) = index.settings().await;
snapshot!(code, @"200 OK");
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":false}"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (settings, code) = index.settings().await;
snapshot!(code, @"200 OK");
snapshot!(settings["embedders"]["manual"], @r###"{"source":"userProvided","dimensions":3,"binaryQuantized":true}"###);
}
#[actix_rt::test]
async fn binary_quantize_before_sending_documents() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents are binary quantized
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
1.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
-1.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn binary_quantize_after_sending_documents() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir", "_vectors": { "manual": [-1.2, -2.3, 3.2] }},
{"id": 1, "name": "echo", "_vectors": { "manual": [2.5, 1.5, -130] }},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
// Make sure the documents are binary quantized
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"_vectors": {
"manual": {
"embeddings": [
[
-1.0,
-1.0,
1.0
]
],
"regenerate": false
}
}
},
{
"id": 1,
"name": "echo",
"_vectors": {
"manual": {
"embeddings": [
[
1.0,
1.0,
-1.0
]
],
"regenerate": false
}
}
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn try_to_disable_binary_quantization() {
let server = Server::new().await;
let index = server.index("doggo");
let (value, code) = server.set_features(json!({"vectorStore": true})).await;
snapshot!(code, @"200 OK");
snapshot!(value, @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": false,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
let ret = server.wait_task(response.uid()).await;
snapshot!(ret, @r###"
{
"uid": "[uid]",
"indexUid": "doggo",
"status": "failed",
"type": "settingsUpdate",
"canceledBy": null,
"details": {
"embedders": {
"manual": {
"source": "userProvided",
"dimensions": 3,
"binaryQuantized": false
}
}
},
"error": {
"message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
"code": "invalid_settings_embedders",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
},
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn binary_quantize_clear_documents() {
let server = Server::new().await;
let index = generate_default_user_provided_documents(&server).await;
let (response, code) = index
.update_settings(json!({
"embedders": {
"manual": {
"binaryQuantized": true,
}
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let (value, _code) = index.clear_all_documents().await;
index.wait_task(value.uid()).await.succeeded();
// Make sure the documents DB has been cleared
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents), @r###"
{
"results": [],
"offset": 0,
"limit": 20,
"total": 0
}
"###);
// Make sure the arroy DB has been cleared
let (documents, _code) =
index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
snapshot!(documents, @r###"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0,
"semanticHitCount": 0
}
"###);
}

View File

@ -1,3 +1,4 @@
mod binary_quantized;
mod openai; mod openai;
mod rest; mod rest;
mod settings; mod settings;
@ -624,7 +625,8 @@ async fn clear_documents() {
"###); "###);
// Make sure the arroy DB has been cleared // Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; let (documents, _code) =
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
snapshot!(documents, @r###" snapshot!(documents, @r###"
{ {
"hits": [], "hits": [],
@ -685,7 +687,11 @@ async fn add_remove_one_vector_4588() {
let task = index.wait_task(value.uid()).await; let task = index.wait_task(value.uid()).await;
snapshot!(task, name: "document-deleted"); snapshot!(task, name: "document-deleted");
let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await; let (documents, _code) = index
.search_post(
json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
)
.await;
snapshot!(documents, @r###" snapshot!(documents, @r###"
{ {
"hits": [ "hits": [

View File

@ -449,7 +449,7 @@ async fn it_works() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "chien de chasse", "q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"},
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -489,7 +489,7 @@ async fn it_works() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "petit chien", "q": "petit chien",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -529,7 +529,7 @@ async fn it_works() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -616,7 +616,7 @@ async fn tokenize_long_text() {
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes",
"showRankingScore": true, "showRankingScore": true,
"attributesToRetrieve": ["id"], "attributesToRetrieve": ["id"],
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1064,7 +1064,7 @@ async fn smaller_dimensions() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "chien de chasse", "q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1104,7 +1104,7 @@ async fn smaller_dimensions() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "petit chien", "q": "petit chien",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1144,7 +1144,7 @@ async fn smaller_dimensions() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1295,7 +1295,7 @@ async fn small_embedding_model() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "chien de chasse", "q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1335,7 +1335,7 @@ async fn small_embedding_model() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "petit chien", "q": "petit chien",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1375,7 +1375,7 @@ async fn small_embedding_model() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1525,7 +1525,7 @@ async fn legacy_embedding_model() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "chien de chasse", "q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1565,7 +1565,7 @@ async fn legacy_embedding_model() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "petit chien", "q": "petit chien",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1605,7 +1605,7 @@ async fn legacy_embedding_model() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1756,7 +1756,7 @@ async fn it_still_works() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "chien de chasse", "q": "chien de chasse",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1796,7 +1796,7 @@ async fn it_still_works() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "petit chien", "q": "petit chien",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");
@ -1836,7 +1836,7 @@ async fn it_still_works() {
let (response, code) = index let (response, code) = index
.search_post(json!({ .search_post(json!({
"q": "grand chien de berger des montagnes", "q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 1.0} "hybrid": {"semanticRatio": 1.0, "embedder": "default"}
})) }))
.await; .await;
snapshot!(code, @"200 OK"); snapshot!(code, @"200 OK");

View File

@ -218,7 +218,8 @@ async fn reset_embedder_documents() {
"###); "###);
// Make sure the arroy DB has been cleared // Make sure the arroy DB has been cleared
let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await; let (documents, _code) =
index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "default"} })).await;
snapshot!(json_string!(documents), @r###" snapshot!(json_string!(documents), @r###"
{ {
"message": "Cannot find embedder with name `default`.", "message": "Cannot find embedder with name `default`.",

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.9.1" bstr = "1.9.1"
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.9.0", default-features = false } charabia = { version = "0.9.1", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.13" crossbeam-channel = "0.5.13"
deserr = "0.6.2" deserr = "0.6.2"
@ -80,7 +80,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
tiktoken-rs = "0.5.9" tiktoken-rs = "0.5.9"
liquid = "0.26.6" liquid = "0.26.6"
rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] } rhai = { version = "1.19.0", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
arroy = "0.4.0" arroy = { git = "https://github.com/meilisearch/arroy/", rev = "2386594dfb009ce08821a925ccc89fb8e30bf73d" }
rand = "0.8.5" rand = "0.8.5"
tracing = "0.1.40" tracing = "0.1.40"
ureq = { version = "2.10.0", features = ["json"] } ureq = { version = "2.10.0", features = ["json"] }
@ -106,6 +106,8 @@ all-tokenizations = [
"charabia/greek", "charabia/greek",
"charabia/khmer", "charabia/khmer",
"charabia/vietnamese", "charabia/vietnamese",
"charabia/swedish-recomposition",
"charabia/german-segmentation",
] ]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
@ -138,6 +140,9 @@ khmer = ["charabia/khmer"]
# allow vietnamese specialized tokenization # allow vietnamese specialized tokenization
vietnamese = ["charabia/vietnamese"] vietnamese = ["charabia/vietnamese"]
# allow german specialized tokenization
german = ["charabia/german-segmentation"]
# force swedish character recomposition # force swedish character recomposition
swedish-recomposition = ["charabia/swedish-recomposition"] swedish-recomposition = ["charabia/swedish-recomposition"]

View File

@ -258,6 +258,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
}, },
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
InvalidSettingsDimensions { embedder_name: String }, InvalidSettingsDimensions { embedder_name: String },
#[error(
"`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
)]
InvalidDisableBinaryQuantization { embedder_name: String },
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")] #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String }, InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")] #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]

View File

@ -21,7 +21,7 @@ use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap; use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME; use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
use crate::vector::{Embedding, EmbeddingConfig}; use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
use crate::{ use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@ -162,7 +162,7 @@ pub struct Index {
/// Maps an embedder name to its id in the arroy store. /// Maps an embedder name to its id in the arroy store.
pub embedder_category_id: Database<Str, U8>, pub embedder_category_id: Database<Str, U8>,
/// Vector store based on arroy™. /// Vector store based on arroy™.
pub vector_arroy: arroy::Database<arroy::distances::Angular>, pub vector_arroy: arroy::Database<Unspecified>,
/// Maps the document id to the document as an obkv store. /// Maps the document id to the document as an obkv store.
pub(crate) documents: Database<BEU32, ObkvCodec>, pub(crate) documents: Database<BEU32, ObkvCodec>,
@ -1614,15 +1614,17 @@ impl Index {
&'a self, &'a self,
rtxn: &'a RoTxn<'a>, rtxn: &'a RoTxn<'a>,
embedder_id: u8, embedder_id: u8,
) -> impl Iterator<Item = Result<arroy::Reader<'a, arroy::distances::Angular>>> + 'a { quantized: bool,
) -> impl Iterator<Item = Result<ArroyWrapper>> + 'a {
crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| { crate::vector::arroy_db_range_for_embedder(embedder_id).map_while(move |k| {
arroy::Reader::open(rtxn, k, self.vector_arroy) let reader = ArroyWrapper::new(self.vector_arroy, k, quantized);
.map(Some) // Here we don't care about the dimensions, but we want to know if we can read
.or_else(|e| match e { // in the database or if its metadata are missing because there is no document with that many vectors.
arroy::Error::MissingMetadata(_) => Ok(None), match reader.dimensions(rtxn) {
e => Err(e.into()), Ok(_) => Some(Ok(reader)),
}) Err(arroy::Error::MissingMetadata(_)) => None,
.transpose() Err(e) => Some(Err(e.into())),
}
}) })
} }
@ -1644,32 +1646,18 @@ impl Index {
docid: DocumentId, docid: DocumentId,
) -> Result<BTreeMap<String, Vec<Embedding>>> { ) -> Result<BTreeMap<String, Vec<Embedding>>> {
let mut res = BTreeMap::new(); let mut res = BTreeMap::new();
for row in self.embedder_category_id.iter(rtxn)? { let embedding_configs = self.embedding_configs(rtxn)?;
let (embedder_name, embedder_id) = row?; for config in embedding_configs {
let embedder_id = (embedder_id as u16) << 8; let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
let mut embeddings = Vec::new(); let embeddings = self
'vectors: for i in 0..=u8::MAX { .arroy_readers(rtxn, embedder_id, config.config.quantized())
let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy) .map_while(|reader| {
.map(Some) reader
.or_else(|e| match e { .and_then(|r| r.item_vector(rtxn, docid).map_err(|e| e.into()))
arroy::Error::MissingMetadata(_) => Ok(None), .transpose()
e => Err(e),
}) })
.transpose(); .collect::<Result<Vec<_>>>()?;
res.insert(config.name.to_owned(), embeddings);
let Some(reader) = reader else {
break 'vectors;
};
let embedding = reader?.item_vector(rtxn, docid)?;
if let Some(embedding) = embedding {
embeddings.push(embedding)
} else {
break 'vectors;
}
}
res.insert(embedder_name.to_owned(), embeddings);
} }
Ok(res) Ok(res)
} }

View File

@ -1,4 +1,5 @@
use std::collections::{BTreeMap, HashMap, HashSet}; use std::collections::{BTreeMap, HashMap, HashSet};
use std::fmt::Display;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use std::{fmt, mem}; use std::{fmt, mem};
@ -37,6 +38,15 @@ pub enum OrderBy {
Count, Count,
} }
impl Display for OrderBy {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
OrderBy::Lexicographic => f.write_str("alphabetically"),
OrderBy::Count => f.write_str("by count"),
}
}
}
pub struct FacetDistribution<'a> { pub struct FacetDistribution<'a> {
facets: Option<HashMap<String, OrderBy>>, facets: Option<HashMap<String, OrderBy>>,
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
@ -100,7 +110,6 @@ impl<'a> FacetDistribution<'a> {
let mut lexicographic_distribution = BTreeMap::new(); let mut lexicographic_distribution = BTreeMap::new();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
let distribution_prelength = distribution.len();
let db = self.index.field_id_docid_facet_f64s; let db = self.index.field_id_docid_facet_f64s;
for docid in candidates { for docid in candidates {
key_buffer.truncate(mem::size_of::<FieldId>()); key_buffer.truncate(mem::size_of::<FieldId>());
@ -113,23 +122,21 @@ impl<'a> FacetDistribution<'a> {
for result in iter { for result in iter {
let ((_, _, value), ()) = result?; let ((_, _, value), ()) = result?;
*lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
if lexicographic_distribution.len() - distribution_prelength
== self.max_values_per_facet
{
break;
}
} }
} }
distribution.extend(lexicographic_distribution); distribution.extend(
lexicographic_distribution
.into_iter()
.take(self.max_values_per_facet.saturating_sub(distribution.len())),
);
} }
FacetType::String => { FacetType::String => {
let mut normalized_distribution = BTreeMap::new(); let mut normalized_distribution = BTreeMap::new();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
let db = self.index.field_id_docid_facet_strings; let db = self.index.field_id_docid_facet_strings;
'outer: for docid in candidates { for docid in candidates {
key_buffer.truncate(mem::size_of::<FieldId>()); key_buffer.truncate(mem::size_of::<FieldId>());
key_buffer.extend_from_slice(&docid.to_be_bytes()); key_buffer.extend_from_slice(&docid.to_be_bytes());
let iter = db let iter = db
@ -144,14 +151,14 @@ impl<'a> FacetDistribution<'a> {
.or_insert_with(|| (original_value, 0)); .or_insert_with(|| (original_value, 0));
*count += 1; *count += 1;
if normalized_distribution.len() == self.max_values_per_facet { // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid,
break 'outer; // so higher ranked facets could be in later docids
}
} }
} }
let iter = normalized_distribution let iter = normalized_distribution
.into_iter() .into_iter()
.take(self.max_values_per_facet.saturating_sub(distribution.len()))
.map(|(_normalized, (original, count))| (original.to_string(), count)); .map(|(_normalized, (original, count))| (original.to_string(), count));
distribution.extend(iter); distribution.extend(iter);
} }
@ -467,7 +474,7 @@ mod tests {
.execute() .execute()
.unwrap(); .unwrap();
milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
let map = FacetDistribution::new(&txn, &index) let map = FacetDistribution::new(&txn, &index)
.facets(iter::once(("colour", OrderBy::Count))) .facets(iter::once(("colour", OrderBy::Count)))

View File

@ -12,7 +12,7 @@ use serde_json::Value;
use super::facet_range_search; use super::facet_range_search;
use crate::error::{Error, UserError}; use crate::error::{Error, UserError};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
}; };
use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{ use crate::{
@ -336,6 +336,24 @@ impl<'a> Filter<'a> {
return Ok(docids); return Ok(docids);
} }
Condition::StartsWith { keyword: _, word } => {
let value = crate::normalize_facet(word.value());
let base = FacetGroupKey { field_id, level: 0, left_bound: value.as_str() };
let docids = strings_db
.prefix_iter(rtxn, &base)?
.map(|result| -> Result<RoaringBitmap> {
match result {
Ok((_facet_group_key, FacetGroupValue { bitmap, .. })) => Ok(bitmap),
Err(_e) => Err(InternalError::from(SerializationError::Decoding {
db_name: Some(FACET_ID_STRING_DOCIDS),
})
.into()),
}
})
.union()?;
return Ok(docids);
}
}; };
let mut output = RoaringBitmap::new(); let mut output = RoaringBitmap::new();

View File

@ -190,7 +190,7 @@ impl<'a> Search<'a> {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
}; };
// no embedder, no semantic search // no embedder, no semantic search
let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else { let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results)); return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
}; };
@ -212,7 +212,7 @@ impl<'a> Search<'a> {
}; };
search.semantic = search.semantic =
Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder }); Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
// TODO: would be better to have two distinct functions at this point // TODO: would be better to have two distinct functions at this point
let vector_results = search.execute()?; let vector_results = search.execute()?;

View File

@ -32,6 +32,7 @@ pub struct SemanticSearch {
vector: Option<Vec<f32>>, vector: Option<Vec<f32>>,
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
quantized: bool,
} }
pub struct Search<'a> { pub struct Search<'a> {
@ -89,9 +90,10 @@ impl<'a> Search<'a> {
&mut self, &mut self,
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
quantized: bool,
vector: Option<Vec<f32>>, vector: Option<Vec<f32>>,
) -> &mut Search<'a> { ) -> &mut Search<'a> {
self.semantic = Some(SemanticSearch { embedder_name, embedder, vector }); self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
self self
} }
@ -206,7 +208,7 @@ impl<'a> Search<'a> {
degraded, degraded,
used_negative_operator, used_negative_operator,
} = match self.semantic.as_ref() { } = match self.semantic.as_ref() {
Some(SemanticSearch { vector: Some(vector), embedder_name, embedder }) => { Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
execute_vector_search( execute_vector_search(
&mut ctx, &mut ctx,
vector, vector,
@ -219,6 +221,7 @@ impl<'a> Search<'a> {
self.limit, self.limit,
embedder_name, embedder_name,
embedder, embedder,
*quantized,
self.time_budget.clone(), self.time_budget.clone(),
self.ranking_score_threshold, self.ranking_score_threshold,
)? )?

View File

@ -312,6 +312,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
Ok(ranking_rules) Ok(ranking_rules)
} }
#[allow(clippy::too_many_arguments)]
fn get_ranking_rules_for_vector<'ctx>( fn get_ranking_rules_for_vector<'ctx>(
ctx: &SearchContext<'ctx>, ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
@ -320,6 +321,7 @@ fn get_ranking_rules_for_vector<'ctx>(
target: &[f32], target: &[f32],
embedder_name: &str, embedder_name: &str,
embedder: &Embedder, embedder: &Embedder,
quantized: bool,
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> { ) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
// query graph search // query graph search
@ -347,6 +349,7 @@ fn get_ranking_rules_for_vector<'ctx>(
limit_plus_offset, limit_plus_offset,
embedder_name, embedder_name,
embedder, embedder,
quantized,
)?; )?;
ranking_rules.push(Box::new(vector_sort)); ranking_rules.push(Box::new(vector_sort));
vector = true; vector = true;
@ -576,6 +579,7 @@ pub fn execute_vector_search(
length: usize, length: usize,
embedder_name: &str, embedder_name: &str,
embedder: &Embedder, embedder: &Embedder,
quantized: bool,
time_budget: TimeBudget, time_budget: TimeBudget,
ranking_score_threshold: Option<f64>, ranking_score_threshold: Option<f64>,
) -> Result<PartialSearchResult> { ) -> Result<PartialSearchResult> {
@ -591,6 +595,7 @@ pub fn execute_vector_search(
vector, vector,
embedder_name, embedder_name,
embedder, embedder,
quantized,
)?; )?;
let mut placeholder_search_logger = logger::DefaultSearchLogger; let mut placeholder_search_logger = logger::DefaultSearchLogger;

View File

@ -16,6 +16,7 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
limit: usize, limit: usize,
distribution_shift: Option<DistributionShift>, distribution_shift: Option<DistributionShift>,
embedder_index: u8, embedder_index: u8,
quantized: bool,
} }
impl<Q: RankingRuleQueryTrait> VectorSort<Q> { impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
@ -26,6 +27,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
limit: usize, limit: usize,
embedder_name: &str, embedder_name: &str,
embedder: &Embedder, embedder: &Embedder,
quantized: bool,
) -> Result<Self> { ) -> Result<Self> {
let embedder_index = ctx let embedder_index = ctx
.index .index
@ -41,6 +43,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
limit, limit,
distribution_shift: embedder.distribution(), distribution_shift: embedder.distribution(),
embedder_index, embedder_index,
quantized,
}) })
} }
@ -49,16 +52,12 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
ctx: &mut SearchContext<'_>, ctx: &mut SearchContext<'_>,
vector_candidates: &RoaringBitmap, vector_candidates: &RoaringBitmap,
) -> Result<()> { ) -> Result<()> {
let readers: std::result::Result<Vec<_>, _> =
ctx.index.arroy_readers(ctx.txn, self.embedder_index).collect();
let readers = readers?;
let target = &self.target; let target = &self.target;
let mut results = Vec::new(); let mut results = Vec::new();
for reader in readers.iter() { for reader in ctx.index.arroy_readers(ctx.txn, self.embedder_index, self.quantized) {
let nns_by_vector = let nns_by_vector =
reader.nns_by_vector(ctx.txn, target, self.limit, None, Some(vector_candidates))?; reader?.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
results.extend(nns_by_vector.into_iter()); results.extend(nns_by_vector.into_iter());
} }
results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance)); results.sort_unstable_by_key(|(_, distance)| OrderedFloat(*distance));

View File

@ -18,9 +18,11 @@ pub struct Similar<'a> {
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
ranking_score_threshold: Option<f64>, ranking_score_threshold: Option<f64>,
quantized: bool,
} }
impl<'a> Similar<'a> { impl<'a> Similar<'a> {
#[allow(clippy::too_many_arguments)]
pub fn new( pub fn new(
id: DocumentId, id: DocumentId,
offset: usize, offset: usize,
@ -29,6 +31,7 @@ impl<'a> Similar<'a> {
rtxn: &'a heed::RoTxn<'a>, rtxn: &'a heed::RoTxn<'a>,
embedder_name: String, embedder_name: String,
embedder: Arc<Embedder>, embedder: Arc<Embedder>,
quantized: bool,
) -> Self { ) -> Self {
Self { Self {
id, id,
@ -40,6 +43,7 @@ impl<'a> Similar<'a> {
embedder_name, embedder_name,
embedder, embedder,
ranking_score_threshold: None, ranking_score_threshold: None,
quantized,
} }
} }
@ -67,19 +71,13 @@ impl<'a> Similar<'a> {
.get(self.rtxn, &self.embedder_name)? .get(self.rtxn, &self.embedder_name)?
.ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?; .ok_or_else(|| crate::UserError::InvalidEmbedder(self.embedder_name.to_owned()))?;
let readers: std::result::Result<Vec<_>, _> =
self.index.arroy_readers(self.rtxn, embedder_index).collect();
let readers = readers?;
let mut results = Vec::new(); let mut results = Vec::new();
for reader in readers.iter() { for reader in self.index.arroy_readers(self.rtxn, embedder_index, self.quantized) {
let nns_by_item = reader.nns_by_item( let nns_by_item = reader?.nns_by_item(
self.rtxn, self.rtxn,
self.id, self.id,
self.limit + self.offset + 1, self.limit + self.offset + 1,
None,
Some(&universe), Some(&universe),
)?; )?;
if let Some(mut nns_by_item) = nns_by_item { if let Some(mut nns_by_item) = nns_by_item {

View File

@ -20,7 +20,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
use crate::vector::settings::{EmbedderAction, ReindexAction}; use crate::vector::settings::ReindexAction;
use crate::vector::{Embedder, Embeddings}; use crate::vector::{Embedder, Embeddings};
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort}; use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
@ -208,10 +208,9 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
if reindex_vectors { if reindex_vectors {
for (name, action) in settings_diff.embedding_config_updates.iter() { for (name, action) in settings_diff.embedding_config_updates.iter() {
match action { if let Some(action) = action.reindex() {
EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted let Some((embedder_name, (embedder, prompt, _quantized))) =
EmbedderAction::Reindex(action) => { configs.remove_entry(name)
let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
else { else {
tracing::error!(embedder = name, "Requested embedder config not found"); tracing::error!(embedder = name, "Requested embedder config not found");
continue; continue;
@ -241,7 +240,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let action = match action { let action = match action {
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex, ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
ReindexAction::RegeneratePrompts => { ReindexAction::RegeneratePrompts => {
let Some((_, old_prompt)) = old_configs.get(name) else { let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
tracing::error!(embedder = name, "Old embedder config not found"); tracing::error!(embedder = name, "Old embedder config not found");
continue; continue;
}; };
@ -260,13 +259,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
add_to_user_provided: RoaringBitmap::new(), add_to_user_provided: RoaringBitmap::new(),
action, action,
}); });
} } else {
continue;
} }
} }
} else { } else {
// document operation // document operation
for (embedder_name, (embedder, prompt)) in configs.into_iter() { for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
// (docid, _index) -> KvWriterDelAdd -> Vector // (docid, _index) -> KvWriterDelAdd -> Vector
let manual_vectors_writer = create_writer( let manual_vectors_writer = create_writer(
indexer.chunk_compression_type, indexer.chunk_compression_type,

View File

@ -43,7 +43,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
use crate::update::{ use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst, IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
}; };
use crate::vector::EmbeddingConfigs; use crate::vector::{ArroyWrapper, EmbeddingConfigs};
use crate::{CboRoaringBitmapCodec, Index, Object, Result}; use crate::{CboRoaringBitmapCodec, Index, Object, Result};
static MERGED_DATABASE_COUNT: usize = 7; static MERGED_DATABASE_COUNT: usize = 7;
@ -679,6 +679,24 @@ where
let number_of_documents = self.index.number_of_documents(self.wtxn)?; let number_of_documents = self.index.number_of_documents(self.wtxn)?;
let mut rng = rand::rngs::StdRng::seed_from_u64(42); let mut rng = rand::rngs::StdRng::seed_from_u64(42);
// If an embedder wasn't used in the typedchunk but must be binary quantized
// we should insert it in `dimension`
for (name, action) in settings_diff.embedding_config_updates.iter() {
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
InternalError::DatabaseMissingEntry {
db_name: "embedder_category_id",
key: None,
},
)?;
let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
let reader =
ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
let dim = reader.dimensions(self.wtxn)?;
dimension.insert(name.to_string(), dim);
}
}
for (embedder_name, dimension) in dimension { for (embedder_name, dimension) in dimension {
let wtxn = &mut *self.wtxn; let wtxn = &mut *self.wtxn;
let vector_arroy = self.index.vector_arroy; let vector_arroy = self.index.vector_arroy;
@ -686,13 +704,23 @@ where
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?; )?;
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
let was_quantized = settings_diff
.old
.embedding_configs
.get(&embedder_name)
.map_or(false, |conf| conf.2);
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
pool.install(|| { pool.install(|| {
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) { for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
let writer = arroy::Writer::new(vector_arroy, k, dimension); let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
if writer.need_build(wtxn)? { if is_quantizing {
writer.build(wtxn, &mut rng, None)?; writer.quantize(wtxn, k, dimension)?;
} else if writer.is_empty(wtxn)? { }
if writer.need_build(wtxn, dimension)? {
writer.build(wtxn, &mut rng, dimension)?;
} else if writer.is_empty(wtxn, dimension)? {
break; break;
} }
} }
@ -2746,6 +2774,7 @@ mod tests {
response: Setting::NotSet, response: Setting::NotSet,
distribution: Setting::NotSet, distribution: Setting::NotSet,
headers: Setting::NotSet, headers: Setting::NotSet,
binary_quantized: Setting::NotSet,
}), }),
); );
settings.set_embedder_settings(embedders); settings.set_embedder_settings(embedders);
@ -2774,7 +2803,7 @@ mod tests {
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap()); std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
let res = index let res = index
.search(&rtxn) .search(&rtxn)
.semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec())) .semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
.execute() .execute()
.unwrap(); .unwrap();
assert_eq!(res.documents_ids.len(), 3); assert_eq!(res.documents_ids.len(), 3);

View File

@ -28,7 +28,8 @@ use crate::update::index_documents::GrenadParameters;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments}; use crate::vector::settings::WriteBackToDocuments;
use crate::vector::ArroyWrapper;
use crate::{ use crate::{
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
}; };
@ -989,19 +990,17 @@ impl<'a, 'i> Transform<'a, 'i> {
None None
}; };
let readers: Result< let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
> = settings_diff
.embedding_config_updates .embedding_config_updates
.iter() .iter()
.filter_map(|(name, action)| { .filter_map(|(name, action)| {
if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
embedder_id, action.write_back()
user_provided,
}) = action
{ {
let readers: Result<Vec<_>> = let readers: Result<Vec<_>> = self
self.index.arroy_readers(wtxn, *embedder_id).collect(); .index
.arroy_readers(wtxn, *embedder_id, action.was_quantized)
.collect();
match readers { match readers {
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))), Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
Err(error) => Some(Err(error)), Err(error) => Some(Err(error)),
@ -1104,23 +1103,14 @@ impl<'a, 'i> Transform<'a, 'i> {
} }
} }
let mut writers = Vec::new();
// delete all vectors from the embedders that need removal // delete all vectors from the embedders that need removal
for (_, (readers, _)) in readers { for (_, (readers, _)) in readers {
for reader in readers { for reader in readers {
let dimensions = reader.dimensions(); let dimensions = reader.dimensions(wtxn)?;
let arroy_index = reader.index(); reader.clear(wtxn, dimensions)?;
drop(reader);
let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
writers.push(writer);
} }
} }
for writer in writers {
writer.clear(wtxn)?;
}
let grenad_params = GrenadParameters { let grenad_params = GrenadParameters {
chunk_compression_type: self.indexer_settings.chunk_compression_type, chunk_compression_type: self.indexer_settings.chunk_compression_type,
chunk_compression_level: self.indexer_settings.chunk_compression_level, chunk_compression_level: self.indexer_settings.chunk_compression_level,

View File

@ -27,6 +27,7 @@ use crate::update::index_documents::helpers::{
as_cloneable_grenad, keep_latest_obkv, try_split_array_at, as_cloneable_grenad, keep_latest_obkv, try_split_array_at,
}; };
use crate::update::settings::InnerIndexSettingsDiff; use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::ArroyWrapper;
use crate::{ use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError, lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec, Result, SerializationError, U8StrStrCodec,
@ -666,9 +667,14 @@ pub(crate) fn write_typed_chunk_into_index(
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or( let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None }, InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?; )?;
let binary_quantized = settings_diff
.old
.embedding_configs
.get(&embedder_name)
.map_or(false, |conf| conf.2);
// FIXME: allow customizing distance // FIXME: allow customizing distance
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index) let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension)) .map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
.collect(); .collect();
// remove vectors for docids we want them removed // remove vectors for docids we want them removed
@ -679,7 +685,7 @@ pub(crate) fn write_typed_chunk_into_index(
for writer in &writers { for writer in &writers {
// Uses invariant: vectors are packed in the first writers. // Uses invariant: vectors are packed in the first writers.
if !writer.del_item(wtxn, docid)? { if !writer.del_item(wtxn, expected_dimension, docid)? {
break; break;
} }
} }
@ -711,7 +717,7 @@ pub(crate) fn write_typed_chunk_into_index(
))); )));
} }
for (embedding, writer) in embeddings.iter().zip(&writers) { for (embedding, writer) in embeddings.iter().zip(&writers) {
writer.add_item(wtxn, docid, embedding)?; writer.add_item(wtxn, expected_dimension, docid, embedding)?;
} }
} }
@ -734,7 +740,7 @@ pub(crate) fn write_typed_chunk_into_index(
break; break;
}; };
if candidate == vector { if candidate == vector {
writer.del_item(wtxn, docid)?; writer.del_item(wtxn, expected_dimension, docid)?;
deleted_index = Some(index); deleted_index = Some(index);
} }
} }
@ -751,8 +757,13 @@ pub(crate) fn write_typed_chunk_into_index(
if let Some((last_index, vector)) = last_index_with_a_vector { if let Some((last_index, vector)) = last_index_with_a_vector {
// unwrap: computed the index from the list of writers // unwrap: computed the index from the list of writers
let writer = writers.get(last_index).unwrap(); let writer = writers.get(last_index).unwrap();
writer.del_item(wtxn, docid)?; writer.del_item(wtxn, expected_dimension, docid)?;
writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?; writers.get(deleted_index).unwrap().add_item(
wtxn,
expected_dimension,
docid,
&vector,
)?;
} }
} }
} }
@ -762,8 +773,8 @@ pub(crate) fn write_typed_chunk_into_index(
// overflow was detected during vector extraction. // overflow was detected during vector extraction.
for writer in &writers { for writer in &writers {
if !writer.contains_item(wtxn, docid)? { if !writer.contains_item(wtxn, expected_dimension, docid)? {
writer.add_item(wtxn, docid, &vector)?; writer.add_item(wtxn, expected_dimension, docid, &vector)?;
break; break;
} }
} }

View File

@ -954,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let old_configs = self.index.embedding_configs(self.wtxn)?; let old_configs = self.index.embedding_configs(self.wtxn)?;
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
.into_iter() .into_iter()
.map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> { .map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
let embedder_id = let embedder_id =
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or( self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
crate::InternalError::DatabaseMissingEntry { crate::InternalError::DatabaseMissingEntry {
@ -964,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
)?; )?;
Ok(( Ok((
name, name,
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { EmbedderAction::with_write_back(
embedder_id, WriteBackToDocuments { embedder_id, user_provided },
user_provided, config.quantized(),
}), ),
)) ))
}) })
.collect(); .collect();
@ -1004,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
match joined { match joined {
// updated config // updated config
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => { EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
let settings_diff = SettingsDiff::from_settings(old, new); let was_quantized = old.binary_quantized.set().unwrap_or_default();
let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
match settings_diff { match settings_diff {
SettingsDiff::Remove => { SettingsDiff::Remove => {
tracing::debug!( tracing::debug!(
@ -1023,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.index.embedder_category_id.delete(self.wtxn, &name)?; self.index.embedder_category_id.delete(self.wtxn, &name)?;
embedder_actions.insert( embedder_actions.insert(
name, name,
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments { EmbedderAction::with_write_back(
embedder_id, WriteBackToDocuments { embedder_id, user_provided },
user_provided, was_quantized,
}), ),
); );
} }
SettingsDiff::Reindex { action, updated_settings } => { SettingsDiff::Reindex { action, updated_settings, quantize } => {
tracing::debug!( tracing::debug!(
embedder = name, embedder = name,
user_provided = user_provided.len(), user_provided = user_provided.len(),
?action, ?action,
"reindex embedder" "reindex embedder"
); );
embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action)); embedder_actions.insert(
name.clone(),
EmbedderAction::with_reindex(action, was_quantized)
.with_is_being_quantized(quantize),
);
let new = let new =
validate_embedding_settings(Setting::Set(updated_settings), &name)?; validate_embedding_settings(Setting::Set(updated_settings), &name)?;
updated_configs.insert(name, (new, user_provided)); updated_configs.insert(name, (new, user_provided));
} }
SettingsDiff::UpdateWithoutReindex { updated_settings } => { SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
tracing::debug!( tracing::debug!(
embedder = name, embedder = name,
user_provided = user_provided.len(), user_provided = user_provided.len(),
@ -1049,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
); );
let new = let new =
validate_embedding_settings(Setting::Set(updated_settings), &name)?; validate_embedding_settings(Setting::Set(updated_settings), &name)?;
if quantize {
embedder_actions.insert(
name.clone(),
EmbedderAction::default().with_is_being_quantized(true),
);
}
updated_configs.insert(name, (new, user_provided)); updated_configs.insert(name, (new, user_provided));
} }
} }
@ -1067,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
&mut setting, &mut setting,
); );
let setting = validate_embedding_settings(setting, &name)?; let setting = validate_embedding_settings(setting, &name)?;
embedder_actions embedder_actions.insert(
.insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex)); name.clone(),
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
);
updated_configs.insert(name, (setting, RoaringBitmap::new())); updated_configs.insert(name, (setting, RoaringBitmap::new()));
} }
} }
@ -1082,21 +1095,16 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let mut find_free_index = let mut find_free_index =
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8); move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
for (name, action) in embedder_actions.iter() { for (name, action) in embedder_actions.iter() {
match action { // ignore actions that are not possible for a new embedder
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { if matches!(action.reindex(), Some(ReindexAction::FullReindex))
/* cannot be a new embedder, so has to have an id already */ && self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
} {
EmbedderAction::Reindex(ReindexAction::FullReindex) => { let id =
if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() { find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
let id = find_free_index()
.ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
tracing::debug!(embedder = name, id, "assigning free id to new embedder"); tracing::debug!(embedder = name, id, "assigning free id to new embedder");
self.index.embedder_category_id.put(self.wtxn, name, &id)?; self.index.embedder_category_id.put(self.wtxn, name, &id)?;
} }
} }
EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ }
}
}
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
.into_iter() .into_iter()
.filter_map(|(name, (config, user_provided))| match config { .filter_map(|(name, (config, user_provided))| match config {
@ -1277,7 +1285,11 @@ impl InnerIndexSettingsDiff {
// if the user-defined searchables changed, then we need to reindex prompts. // if the user-defined searchables changed, then we need to reindex prompts.
if cache_user_defined_searchables { if cache_user_defined_searchables {
for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() { for (embedder_name, (config, _, _quantized)) in
new_settings.embedding_configs.inner_as_ref()
{
let was_quantized =
old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2);
// skip embedders that don't use document templates // skip embedders that don't use document templates
if !config.uses_document_template() { if !config.uses_document_template() {
continue; continue;
@ -1287,16 +1299,19 @@ impl InnerIndexSettingsDiff {
// this always makes the code clearer by explicitly handling the cases // this always makes the code clearer by explicitly handling the cases
match embedding_config_updates.entry(embedder_name.clone()) { match embedding_config_updates.entry(embedder_name.clone()) {
std::collections::btree_map::Entry::Vacant(entry) => { std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts)); entry.insert(EmbedderAction::with_reindex(
ReindexAction::RegeneratePrompts,
was_quantized,
));
} }
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() { std::collections::btree_map::Entry::Occupied(entry) => {
EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */ let EmbedderAction {
was_quantized: _,
is_being_quantized: _,
write_back: _, // We are deleting this embedder, so no point in regeneration
reindex: _, // We are already fully reindexing
} = entry.get();
} }
EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */
}
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */
}
},
}; };
} }
} }
@ -1546,7 +1561,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
.map( .map(
|IndexEmbeddingConfig { |IndexEmbeddingConfig {
name, name,
config: EmbeddingConfig { embedder_options, prompt }, config: EmbeddingConfig { embedder_options, prompt, quantized },
.. ..
}| { }| {
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?); let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
@ -1556,7 +1571,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
.map_err(crate::vector::Error::from) .map_err(crate::vector::Error::from)
.map_err(crate::Error::from)?, .map_err(crate::Error::from)?,
); );
Ok((name, (embedder, prompt))) Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
}, },
) )
.collect(); .collect();
@ -1581,6 +1596,7 @@ fn validate_prompt(
response, response,
distribution, distribution,
headers, headers,
binary_quantized: binary_quantize,
}) => { }) => {
let max_bytes = match document_template_max_bytes.set() { let max_bytes = match document_template_max_bytes.set() {
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
@ -1613,6 +1629,7 @@ fn validate_prompt(
response, response,
distribution, distribution,
headers, headers,
binary_quantized: binary_quantize,
})) }))
} }
new => Ok(new), new => Ok(new),
@ -1638,6 +1655,7 @@ pub fn validate_embedding_settings(
response, response,
distribution, distribution,
headers, headers,
binary_quantized: binary_quantize,
} = settings; } = settings;
if let Some(0) = dimensions.set() { if let Some(0) = dimensions.set() {
@ -1678,6 +1696,7 @@ pub fn validate_embedding_settings(
response, response,
distribution, distribution,
headers, headers,
binary_quantized: binary_quantize,
})); }));
}; };
match inferred_source { match inferred_source {
@ -1779,6 +1798,7 @@ pub fn validate_embedding_settings(
response, response,
distribution, distribution,
headers, headers,
binary_quantized: binary_quantize,
})) }))
} }

View File

@ -1,8 +1,12 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use arroy::distances::{Angular, BinaryQuantizedAngular};
use arroy::ItemId;
use deserr::{DeserializeError, Deserr}; use deserr::{DeserializeError, Deserr};
use heed::{RoTxn, RwTxn, Unspecified};
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use self::error::{EmbedError, NewEmbedderError}; use self::error::{EmbedError, NewEmbedderError};
@ -26,6 +30,171 @@ pub type Embedding = Vec<f32>;
pub const REQUEST_PARALLELISM: usize = 40; pub const REQUEST_PARALLELISM: usize = 40;
pub struct ArroyWrapper {
quantized: bool,
index: u16,
database: arroy::Database<Unspecified>,
}
impl ArroyWrapper {
pub fn new(database: arroy::Database<Unspecified>, index: u16, quantized: bool) -> Self {
Self { database, index, quantized }
}
pub fn index(&self) -> u16 {
self.index
}
pub fn dimensions(&self, rtxn: &RoTxn) -> Result<usize, arroy::Error> {
if self.quantized {
Ok(arroy::Reader::open(rtxn, self.index, self.quantized_db())?.dimensions())
} else {
Ok(arroy::Reader::open(rtxn, self.index, self.angular_db())?.dimensions())
}
}
pub fn quantize(
&mut self,
wtxn: &mut RwTxn,
index: u16,
dimension: usize,
) -> Result<(), arroy::Error> {
if !self.quantized {
let writer = arroy::Writer::new(self.angular_db(), index, dimension);
writer.prepare_changing_distance::<BinaryQuantizedAngular>(wtxn)?;
self.quantized = true;
}
Ok(())
}
pub fn need_build(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension).need_build(rtxn)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension).need_build(rtxn)
}
}
pub fn build<R: rand::Rng + rand::SeedableRng>(
&self,
wtxn: &mut RwTxn,
rng: &mut R,
dimension: usize,
) -> Result<(), arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension).build(wtxn, rng, None)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension).build(wtxn, rng, None)
}
}
pub fn add_item(
&self,
wtxn: &mut RwTxn,
dimension: usize,
item_id: arroy::ItemId,
vector: &[f32],
) -> Result<(), arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension)
.add_item(wtxn, item_id, vector)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension)
.add_item(wtxn, item_id, vector)
}
}
pub fn del_item(
&self,
wtxn: &mut RwTxn,
dimension: usize,
item_id: arroy::ItemId,
) -> Result<bool, arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension).del_item(wtxn, item_id)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension).del_item(wtxn, item_id)
}
}
pub fn clear(&self, wtxn: &mut RwTxn, dimension: usize) -> Result<(), arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension).clear(wtxn)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension).clear(wtxn)
}
}
pub fn is_empty(&self, rtxn: &RoTxn, dimension: usize) -> Result<bool, arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension).is_empty(rtxn)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension).is_empty(rtxn)
}
}
pub fn contains_item(
&self,
rtxn: &RoTxn,
dimension: usize,
item: arroy::ItemId,
) -> Result<bool, arroy::Error> {
if self.quantized {
arroy::Writer::new(self.quantized_db(), self.index, dimension).contains_item(rtxn, item)
} else {
arroy::Writer::new(self.angular_db(), self.index, dimension).contains_item(rtxn, item)
}
}
pub fn nns_by_item(
&self,
rtxn: &RoTxn,
item: ItemId,
limit: usize,
filter: Option<&RoaringBitmap>,
) -> Result<Option<Vec<(ItemId, f32)>>, arroy::Error> {
if self.quantized {
arroy::Reader::open(rtxn, self.index, self.quantized_db())?
.nns_by_item(rtxn, item, limit, None, None, filter)
} else {
arroy::Reader::open(rtxn, self.index, self.angular_db())?
.nns_by_item(rtxn, item, limit, None, None, filter)
}
}
pub fn nns_by_vector(
&self,
txn: &RoTxn,
item: &[f32],
limit: usize,
filter: Option<&RoaringBitmap>,
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
if self.quantized {
arroy::Reader::open(txn, self.index, self.quantized_db())?
.nns_by_vector(txn, item, limit, None, None, filter)
} else {
arroy::Reader::open(txn, self.index, self.angular_db())?
.nns_by_vector(txn, item, limit, None, None, filter)
}
}
pub fn item_vector(&self, rtxn: &RoTxn, docid: u32) -> Result<Option<Vec<f32>>, arroy::Error> {
if self.quantized {
arroy::Reader::open(rtxn, self.index, self.quantized_db())?.item_vector(rtxn, docid)
} else {
arroy::Reader::open(rtxn, self.index, self.angular_db())?.item_vector(rtxn, docid)
}
}
fn angular_db(&self) -> arroy::Database<Angular> {
self.database.remap_data_type()
}
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedAngular> {
self.database.remap_data_type()
}
}
/// One or multiple embeddings stored consecutively in a flat vector. /// One or multiple embeddings stored consecutively in a flat vector.
pub struct Embeddings<F> { pub struct Embeddings<F> {
data: Vec<F>, data: Vec<F>,
@ -124,62 +293,48 @@ pub struct EmbeddingConfig {
pub embedder_options: EmbedderOptions, pub embedder_options: EmbedderOptions,
/// Document template /// Document template
pub prompt: PromptData, pub prompt: PromptData,
/// If this embedder is binary quantized
pub quantized: Option<bool>,
// TODO: add metrics and anything needed // TODO: add metrics and anything needed
} }
impl EmbeddingConfig {
pub fn quantized(&self) -> bool {
self.quantized.unwrap_or_default()
}
}
/// Map of embedder configurations. /// Map of embedder configurations.
/// ///
/// Each configuration is mapped to a name. /// Each configuration is mapped to a name.
#[derive(Clone, Default)] #[derive(Clone, Default)]
pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>)>); pub struct EmbeddingConfigs(HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>);
impl EmbeddingConfigs { impl EmbeddingConfigs {
/// Create the map from its internal component.s /// Create the map from its internal component.s
pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>)>) -> Self { pub fn new(data: HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)>) -> Self {
Self(data) Self(data)
} }
/// Get an embedder configuration and template from its name. /// Get an embedder configuration and template from its name.
pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>)> { pub fn get(&self, name: &str) -> Option<(Arc<Embedder>, Arc<Prompt>, bool)> {
self.0.get(name).cloned() self.0.get(name).cloned()
} }
/// Get the default embedder configuration, if any. pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
self.get(self.get_default_embedder_name())
}
pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
&self.0 &self.0
} }
pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>)> { pub fn into_inner(self) -> HashMap<String, (Arc<Embedder>, Arc<Prompt>, bool)> {
self.0 self.0
} }
/// Get the name of the default embedder configuration.
///
/// The default embedder is determined as follows:
///
/// - If there is only one embedder, it is always the default.
/// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder.
/// - In all other cases, there is no default embedder.
pub fn get_default_embedder_name(&self) -> &str {
let mut it = self.0.keys();
let first_name = it.next();
let second_name = it.next();
match (first_name, second_name) {
(None, _) => "default",
(Some(first), None) => first,
(Some(_), Some(_)) => "default",
}
}
} }
impl IntoIterator for EmbeddingConfigs { impl IntoIterator for EmbeddingConfigs {
type Item = (String, (Arc<Embedder>, Arc<Prompt>)); type Item = (String, (Arc<Embedder>, Arc<Prompt>, bool));
type IntoIter = std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>)>; type IntoIter =
std::collections::hash_map::IntoIter<String, (Arc<Embedder>, Arc<Prompt>, bool)>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.0.into_iter() self.0.into_iter()

View File

@ -32,6 +32,9 @@ pub struct EmbeddingSettings {
pub dimensions: Setting<usize>, pub dimensions: Setting<usize>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)] #[deserr(default)]
pub binary_quantized: Setting<bool>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub document_template: Setting<String>, pub document_template: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)] #[deserr(default)]
@ -85,23 +88,63 @@ pub enum ReindexAction {
pub enum SettingsDiff { pub enum SettingsDiff {
Remove, Remove,
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings }, Reindex { action: ReindexAction, updated_settings: EmbeddingSettings, quantize: bool },
UpdateWithoutReindex { updated_settings: EmbeddingSettings }, UpdateWithoutReindex { updated_settings: EmbeddingSettings, quantize: bool },
} }
pub enum EmbedderAction { #[derive(Default, Debug)]
WriteBackToDocuments(WriteBackToDocuments), pub struct EmbedderAction {
Reindex(ReindexAction), pub was_quantized: bool,
pub is_being_quantized: bool,
pub write_back: Option<WriteBackToDocuments>,
pub reindex: Option<ReindexAction>,
} }
impl EmbedderAction {
pub fn is_being_quantized(&self) -> bool {
self.is_being_quantized
}
pub fn write_back(&self) -> Option<&WriteBackToDocuments> {
self.write_back.as_ref()
}
pub fn reindex(&self) -> Option<&ReindexAction> {
self.reindex.as_ref()
}
pub fn with_is_being_quantized(mut self, quantize: bool) -> Self {
self.is_being_quantized = quantize;
self
}
pub fn with_write_back(write_back: WriteBackToDocuments, was_quantized: bool) -> Self {
Self {
was_quantized,
is_being_quantized: false,
write_back: Some(write_back),
reindex: None,
}
}
pub fn with_reindex(reindex: ReindexAction, was_quantized: bool) -> Self {
Self { was_quantized, is_being_quantized: false, write_back: None, reindex: Some(reindex) }
}
}
#[derive(Debug)]
pub struct WriteBackToDocuments { pub struct WriteBackToDocuments {
pub embedder_id: u8, pub embedder_id: u8,
pub user_provided: RoaringBitmap, pub user_provided: RoaringBitmap,
} }
impl SettingsDiff { impl SettingsDiff {
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self { pub fn from_settings(
match new { embedder_name: &str,
old: EmbeddingSettings,
new: Setting<EmbeddingSettings>,
) -> Result<Self, UserError> {
let ret = match new {
Setting::Set(new) => { Setting::Set(new) => {
let EmbeddingSettings { let EmbeddingSettings {
mut source, mut source,
@ -116,6 +159,7 @@ impl SettingsDiff {
mut distribution, mut distribution,
mut headers, mut headers,
mut document_template_max_bytes, mut document_template_max_bytes,
binary_quantized: mut binary_quantize,
} = old; } = old;
let EmbeddingSettings { let EmbeddingSettings {
@ -131,8 +175,17 @@ impl SettingsDiff {
distribution: new_distribution, distribution: new_distribution,
headers: new_headers, headers: new_headers,
document_template_max_bytes: new_document_template_max_bytes, document_template_max_bytes: new_document_template_max_bytes,
binary_quantized: new_binary_quantize,
} = new; } = new;
if matches!(binary_quantize, Setting::Set(true))
&& matches!(new_binary_quantize, Setting::Set(false))
{
return Err(UserError::InvalidDisableBinaryQuantization {
embedder_name: embedder_name.to_string(),
});
}
let mut reindex_action = None; let mut reindex_action = None;
// **Warning**: do not use short-circuiting || here, we want all these operations applied // **Warning**: do not use short-circuiting || here, we want all these operations applied
@ -172,6 +225,7 @@ impl SettingsDiff {
_ => {} _ => {}
} }
} }
let binary_quantize_changed = binary_quantize.apply(new_binary_quantize);
if url.apply(new_url) { if url.apply(new_url) {
match source { match source {
// do not regenerate on an url change in OpenAI // do not regenerate on an url change in OpenAI
@ -231,16 +285,27 @@ impl SettingsDiff {
distribution, distribution,
headers, headers,
document_template_max_bytes, document_template_max_bytes,
binary_quantized: binary_quantize,
}; };
match reindex_action { match reindex_action {
Some(action) => Self::Reindex { action, updated_settings }, Some(action) => Self::Reindex {
None => Self::UpdateWithoutReindex { updated_settings }, action,
updated_settings,
quantize: binary_quantize_changed,
},
None => Self::UpdateWithoutReindex {
updated_settings,
quantize: binary_quantize_changed,
},
} }
} }
Setting::Reset => Self::Remove, Setting::Reset => Self::Remove,
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old }, Setting::NotSet => {
Self::UpdateWithoutReindex { updated_settings: old, quantize: false }
} }
};
Ok(ret)
} }
} }
@ -486,7 +551,7 @@ impl std::fmt::Display for EmbedderSource {
impl From<EmbeddingConfig> for EmbeddingSettings { impl From<EmbeddingConfig> for EmbeddingSettings {
fn from(value: EmbeddingConfig) -> Self { fn from(value: EmbeddingConfig) -> Self {
let EmbeddingConfig { embedder_options, prompt } = value; let EmbeddingConfig { embedder_options, prompt, quantized } = value;
let document_template_max_bytes = let document_template_max_bytes =
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
match embedder_options { match embedder_options {
@ -507,6 +572,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
response: Setting::NotSet, response: Setting::NotSet,
headers: Setting::NotSet, headers: Setting::NotSet,
distribution: Setting::some_or_not_set(distribution), distribution: Setting::some_or_not_set(distribution),
binary_quantized: Setting::some_or_not_set(quantized),
}, },
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
url, url,
@ -527,6 +593,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
response: Setting::NotSet, response: Setting::NotSet,
headers: Setting::NotSet, headers: Setting::NotSet,
distribution: Setting::some_or_not_set(distribution), distribution: Setting::some_or_not_set(distribution),
binary_quantized: Setting::some_or_not_set(quantized),
}, },
super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions {
embedding_model, embedding_model,
@ -547,6 +614,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
response: Setting::NotSet, response: Setting::NotSet,
headers: Setting::NotSet, headers: Setting::NotSet,
distribution: Setting::some_or_not_set(distribution), distribution: Setting::some_or_not_set(distribution),
binary_quantized: Setting::some_or_not_set(quantized),
}, },
super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
dimensions, dimensions,
@ -564,6 +632,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
response: Setting::NotSet, response: Setting::NotSet,
headers: Setting::NotSet, headers: Setting::NotSet,
distribution: Setting::some_or_not_set(distribution), distribution: Setting::some_or_not_set(distribution),
binary_quantized: Setting::some_or_not_set(quantized),
}, },
super::EmbedderOptions::Rest(super::rest::EmbedderOptions { super::EmbedderOptions::Rest(super::rest::EmbedderOptions {
api_key, api_key,
@ -586,6 +655,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
response: Setting::Set(response), response: Setting::Set(response),
distribution: Setting::some_or_not_set(distribution), distribution: Setting::some_or_not_set(distribution),
headers: Setting::Set(headers), headers: Setting::Set(headers),
binary_quantized: Setting::some_or_not_set(quantized),
}, },
} }
} }
@ -607,8 +677,11 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
response, response,
distribution, distribution,
headers, headers,
binary_quantized,
} = value; } = value;
this.quantized = binary_quantized.set();
if let Some(source) = source.set() { if let Some(source) = source.set() {
match source { match source {
EmbedderSource::OpenAi => { EmbedderSource::OpenAi => {