diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs
index 14908120c..3cccb5a69 100644
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@@ -108,8 +108,10 @@ pub struct IndexStats {
/// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution,
/// Creation date of the index.
+ #[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
/// Date of the last update of the index.
+ #[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
}
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index e881734fb..2e70b4eb7 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -72,6 +72,19 @@ fn on_panic(info: &std::panic::PanicInfo) {
#[actix_web::main]
async fn main() -> anyhow::Result<()> {
+ try_main().await.inspect_err(|error| {
+ tracing::error!(%error);
+ let mut current = error.source();
+ let mut depth = 0;
+ while let Some(source) = current {
+ tracing::info!(%source, depth, "Error caused by");
+ current = source.source();
+ depth += 1;
+ }
+ })
+}
+
+async fn try_main() -> anyhow::Result<()> {
let (opt, config_read_from) = Opt::try_build()?;
std::panic::set_hook(Box::new(on_panic));
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index 6f081f1c7..e95a75f69 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -682,6 +682,7 @@ generate_configure!(
filterable_attributes,
sortable_attributes,
displayed_attributes,
+ localized_attributes,
searchable_attributes,
distinct_attribute,
proximity_precision,
diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs
index dada9159b..915505be0 100644
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@@ -1369,12 +1369,18 @@ pub fn perform_facet_search(
None => TimeBudget::default(),
};
+ // In the faceted search context, we want to use the intersection between the locales provided by the user
+ // and the locales of the facet string.
+ // If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
+ // If the user does not provide locales, we use the locales of the facet string.
let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
- let locales = locales.or_else(|| {
- localized_attributes
+ let localized_attributes_locales =
+ localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name));
+ let locales = localized_attributes_locales.map(|attr| {
+ attr.locales
.into_iter()
- .find(|attr| attr.match_str(&facet_name))
- .map(|attr| attr.locales)
+ .filter(|locale| locales.as_ref().map_or(true, |locales| locales.contains(locale)))
+ .collect()
});
let (search, _, _, _) =
diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs
index 9f1c22b75..dbc4fcc30 100644
--- a/meilisearch/tests/search/locales.rs
+++ b/meilisearch/tests/search/locales.rs
@@ -386,12 +386,39 @@ async fn force_locales() {
|response, code| {
snapshot!(response, @r###"
{
- "hits": [],
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
"query": "\"进击的巨人\"",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
- "estimatedTotalHits": 0
+ "estimatedTotalHits": 1
}
"###);
snapshot!(code, @"200 OK");
@@ -483,12 +510,39 @@ async fn force_locales_with_pattern() {
|response, code| {
snapshot!(response, @r###"
{
- "hits": [],
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
"query": "\"进击的巨人\"",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
- "estimatedTotalHits": 0
+ "estimatedTotalHits": 1
}
"###);
snapshot!(code, @"200 OK");
@@ -761,6 +815,275 @@ async fn force_different_locales_with_pattern() {
.await;
}
+#[actix_rt::test]
+async fn auto_infer_locales_at_search_with_attributes_to_search_on() {
+ let server = Server::new().await;
+
+ let index = server.index("test");
+ let documents = DOCUMENTS.clone();
+ let (response, _) = index
+ .update_settings(
+ json!({
+ "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+ "localizedAttributes": [
+ // force japanese
+ {"attributePatterns": ["*_zh"], "locales": ["jpn"]},
+ // force chinese
+ {"attributePatterns": ["*_ja"], "locales": ["cmn"]},
+ // any language
+ {"attributePatterns": ["*_en"], "locales": []}
+ ]
+ }),
+ )
+ .await;
+ snapshot!(response, @r###"
+ {
+ "taskUid": 0,
+ "indexUid": "test",
+ "status": "enqueued",
+ "type": "settingsUpdate",
+ "enqueuedAt": "[date]"
+ }
+ "###);
+ index.add_documents(documents, None).await;
+ index.wait_task(1).await;
+
+ // auto infer any language
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 0
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+
+ // should infer chinese
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+}
+
+#[actix_rt::test]
+async fn auto_infer_locales_at_search() {
+ let server = Server::new().await;
+
+ let index = server.index("test");
+ let documents = DOCUMENTS.clone();
+ let (response, _) = index
+ .update_settings(
+ json!({
+ "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+ "localizedAttributes": [
+ // force japanese
+ {"attributePatterns": ["*"], "locales": ["jpn"]},
+ ]
+ }),
+ )
+ .await;
+ snapshot!(response, @r###"
+ {
+ "taskUid": 0,
+ "indexUid": "test",
+ "status": "enqueued",
+ "type": "settingsUpdate",
+ "enqueuedAt": "[date]"
+ }
+ "###);
+ index.add_documents(documents, None).await;
+ index.wait_task(1).await;
+
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+
+ index
+ .search(
+ json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
+ |response, code| {
+ snapshot!(response, @r###"
+ {
+ "hits": [
+ {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": 853,
+ "_vectors": {
+ "manual": [
+ 1.0,
+ 2.0,
+ 3.0
+ ]
+ },
+ "_formatted": {
+ "name_zh": "进击的巨人",
+ "author_zh": "諫山創",
+ "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。",
+ "id": "853",
+ "_vectors": {
+ "manual": [
+ "1.0",
+ "2.0",
+ "3.0"
+ ]
+ }
+ }
+ }
+ ],
+ "query": "\"进击的巨人\"",
+ "processingTimeMs": "[duration]",
+ "limit": 20,
+ "offset": 0,
+ "estimatedTotalHits": 1
+ }
+ "###);
+ snapshot!(code, @"200 OK");
+ },
+ )
+ .await;
+}
+
#[actix_rt::test]
async fn force_different_locales_with_pattern_nested() {
let server = Server::new().await;
diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index 3c406cd5f..974025652 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -7,6 +7,7 @@ mod facet_search;
mod formatted;
mod geo;
mod hybrid;
+#[cfg(not(feature = "chinese-pinyin"))]
mod locales;
mod matching_strategy;
mod multi;
@@ -169,6 +170,7 @@ async fn negative_special_cases_search() {
}
#[cfg(feature = "default")]
+#[cfg(not(feature = "chinese-pinyin"))]
#[actix_rt::test]
async fn test_kanji_language_detection() {
let server = Server::new().await;
diff --git a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap
index 903e96ffb..18532cba4 100644
--- a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap
+++ b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap
@@ -2,7 +2,7 @@
source: meilisearch/tests/search/errors.rs
---
{
- "uid": 0,
+ "uid": "[uid]",
"indexUid": "tamo",
"status": "succeeded",
"type": "indexCreation",
diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs
index 58805d54f..1571b8ca6 100644
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -9,6 +9,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(|
let mut map = HashMap::new();
map.insert("displayed_attributes", json!(["*"]));
map.insert("searchable_attributes", json!(["*"]));
+ map.insert("localized_attributes", json!(null));
map.insert("filterable_attributes", json!([]));
map.insert("distinct_attribute", json!(null));
map.insert(
@@ -409,6 +410,7 @@ macro_rules! test_setting_routes {
test_setting_routes!(
filterable_attributes put,
displayed_attributes put,
+ localized_attributes put,
searchable_attributes put,
distinct_attribute put,
stop_words put,
diff --git a/meilisearch/tests/vector/intel_gen.txt.gz b/meilisearch/tests/vector/intel_gen.txt.gz
new file mode 100644
index 000000000..115eafea5
Binary files /dev/null and b/meilisearch/tests/vector/intel_gen.txt.gz differ
diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs
index 66f1f87e7..7c9b375d9 100644
--- a/meilisearch/tests/vector/mod.rs
+++ b/meilisearch/tests/vector/mod.rs
@@ -1,3 +1,4 @@
+mod openai;
mod rest;
mod settings;
@@ -10,6 +11,22 @@ use crate::common::index::Index;
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
use crate::json;
+async fn get_server_vector() -> Server {
+ let server = Server::new().await;
+ let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+ snapshot!(code, @"200 OK");
+ snapshot!(value, @r###"
+ {
+ "vectorStore": true,
+ "metrics": false,
+ "logsRoute": false,
+ "editDocumentsByFunction": false,
+ "containsFilter": false
+ }
+ "###);
+ server
+}
+
#[actix_rt::test]
async fn add_remove_user_provided() {
let server = Server::new().await;
diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs
new file mode 100644
index 000000000..f350abbe1
--- /dev/null
+++ b/meilisearch/tests/vector/openai.rs
@@ -0,0 +1,1873 @@
+use std::collections::BTreeMap;
+use std::io::Write;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::OnceLock;
+
+use meili_snap::{json_string, snapshot};
+use wiremock::matchers::{method, path};
+use wiremock::{Mock, MockServer, Request, ResponseTemplate};
+
+use crate::common::{GetAllDocumentsOptions, Value};
+use crate::json;
+use crate::vector::get_server_vector;
+
+#[derive(serde::Deserialize)]
+struct OpenAiResponses(BTreeMap);
+
+#[derive(serde::Deserialize)]
+struct OpenAiResponse {
+ large: Option>,
+ small: Option>,
+ ada: Option>,
+ large_512: Option>,
+}
+
+#[derive(serde::Deserialize)]
+struct OpenAiTokenizedResponses {
+ tokens: Vec,
+ embedding: Vec,
+}
+
+impl OpenAiResponses {
+ fn get(&self, text: &str, model_dimensions: ModelDimensions) -> Option<&[f32]> {
+ let entry = self.0.get(text)?;
+ match model_dimensions {
+ ModelDimensions::Large => entry.large.as_deref(),
+ ModelDimensions::Small => entry.small.as_deref(),
+ ModelDimensions::Ada => entry.ada.as_deref(),
+ ModelDimensions::Large512 => entry.large_512.as_deref(),
+ }
+ }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum ModelDimensions {
+ Large,
+ Small,
+ Ada,
+ Large512,
+}
+
+impl ModelDimensions {
+ fn add_to_settings(&self, settings: &mut Value) {
+ settings["model"] = serde_json::json!(self.model());
+ if let ModelDimensions::Large512 = self {
+ settings["dimensions"] = serde_json::json!(512);
+ }
+ }
+
+ fn model(&self) -> &'static str {
+ match self {
+ ModelDimensions::Large | ModelDimensions::Large512 => "text-embedding-3-large",
+ ModelDimensions::Small => "text-embedding-3-small",
+ ModelDimensions::Ada => "text-embedding-ada-002",
+ }
+ }
+
+ fn from_request(request: &serde_json::Value) -> Self {
+ let has_dimensions_512 = if let Some(dimensions) = request.get("dimensions") {
+ if dimensions != 512 {
+ panic!("unsupported dimensions values")
+ }
+ true
+ } else {
+ false
+ };
+ let serde_json::Value::String(model) = &request["model"] else {
+ panic!("unsupported non string model")
+ };
+ match (model.as_str(), has_dimensions_512) {
+ ("text-embedding-3-large", true) => Self::Large512,
+ (_, true) => panic!("unsupported dimensions with non-large model"),
+ ("text-embedding-3-large", false) => Self::Large,
+ ("text-embedding-3-small", false) => Self::Small,
+ ("text-embedding-ada-002", false) => Self::Ada,
+ (_, false) => panic!("unsupported model"),
+ }
+ }
+}
+
+fn openai_responses() -> &'static OpenAiResponses {
+ static OPENAI_RESPONSES: OnceLock = OnceLock::new();
+ OPENAI_RESPONSES.get_or_init(|| {
+ // json file that was compressed with gzip
+ // decompress with `gzip --keep -d openai_responses.json.gz`
+ // recompress with `gzip --keep -c openai_responses.json > openai_responses.json.gz`
+ let compressed_responses = include_bytes!("openai_responses.json.gz");
+ let mut responses = Vec::new();
+ let mut decoder = flate2::write::GzDecoder::new(&mut responses);
+
+ decoder.write_all(compressed_responses).unwrap();
+ drop(decoder);
+ serde_json::from_slice(&responses).unwrap()
+ })
+}
+
+fn openai_tokenized_responses() -> &'static OpenAiTokenizedResponses {
+ static OPENAI_TOKENIZED_RESPONSES: OnceLock = OnceLock::new();
+ OPENAI_TOKENIZED_RESPONSES.get_or_init(|| {
+ // json file that was compressed with gzip
+ // decompress with `gzip --keep -d openai_tokenized_responses.json.gz`
+ // recompress with `gzip --keep -c openai_tokenized_responses.json > openai_tokenized_responses.json.gz`
+ let compressed_responses = include_bytes!("openai_tokenized_responses.json.gz");
+ let mut responses = Vec::new();
+ let mut decoder = flate2::write::GzDecoder::new(&mut responses);
+
+ decoder.write_all(compressed_responses).unwrap();
+ drop(decoder);
+ serde_json::from_slice(&responses).unwrap()
+ })
+}
+
+fn long_text() -> &'static str {
+ static LONG_TEXT: OnceLock = OnceLock::new();
+ LONG_TEXT.get_or_init(|| {
+ // decompress with `gzip --keep -d intel_gen.txt.gz`
+ // recompress with `gzip --keep -c intel_gen.txt > intel_gen.txt.gz`
+ let compressed_long_text = include_bytes!("intel_gen.txt.gz");
+ let mut long_text = Vec::new();
+ let mut decoder = flate2::write::GzDecoder::new(&mut long_text);
+
+ decoder.write_all(compressed_long_text).unwrap();
+ drop(decoder);
+ let long_text = std::str::from_utf8(&long_text).unwrap();
+
+ long_text.repeat(3)
+ })
+}
+
+async fn create_mock_tokenized() -> (MockServer, Value) {
+ create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false).await
+}
+
+async fn create_mock_with_template(
+ document_template: &str,
+ model_dimensions: ModelDimensions,
+ fallible: bool,
+) -> (MockServer, Value) {
+ let mock_server = MockServer::start().await;
+ const API_KEY: &str = "my-api-key";
+ const API_KEY_BEARER: &str = "Bearer my-api-key";
+
+ let attempt = AtomicU32::new(0);
+
+ Mock::given(method("POST"))
+ .and(path("/"))
+ .respond_with(move |req: &Request| {
+ // 0. maybe return 500
+ if fallible {
+ let attempt = attempt.fetch_add(1, Ordering::Relaxed);
+ let failed = matches!(attempt % 4, 0 | 1 | 3);
+ if failed {
+ return ResponseTemplate::new(503).set_body_json(json!({
+ "error": {
+ "message": "come back later",
+ "type": "come_back_later"
+ }
+ }))
+ }
+ }
+ // 1. check API key
+ match req.headers.get("Authorization") {
+ Some(api_key) if api_key == API_KEY_BEARER => {
+ {}
+ }
+ Some(api_key) => {
+ let api_key = api_key.to_str().unwrap();
+ return ResponseTemplate::new(401).set_body_json(
+ json!(
+ {
+ "error": {
+ "message": format!("Incorrect API key provided: {api_key}. You can find your API key at https://platform.openai.com/account/api-keys."),
+ "type": "invalid_request_error",
+ "param": serde_json::Value::Null,
+ "code": "invalid_api_key"
+ }
+ }
+ ),
+ )
+ }
+ None => {
+ return ResponseTemplate::new(401).set_body_json(
+ json!(
+ {
+ "error": {
+ "message": "You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.",
+ "type": "invalid_request_error",
+ "param": serde_json::Value::Null,
+ "code": serde_json::Value::Null
+ }
+ }
+ ),
+ )
+ }
+ }
+ // 2. parse text inputs
+ let query: serde_json::Value = match req.body_json() {
+ Ok(query) => query,
+ Err(_error) => return ResponseTemplate::new(400).set_body_json(
+ json!(
+ {
+ "error": {
+ "message": "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)",
+ "type": "invalid_request_error",
+ "param": serde_json::Value::Null,
+ "code": serde_json::Value::Null
+ }
+ }
+ )
+ )
+ };
+ let query_model_dimensions = ModelDimensions::from_request(&query);
+ if query_model_dimensions != model_dimensions {
+ panic!("Expected {model_dimensions:?}, got {query_model_dimensions:?}")
+ }
+
+ // 3. for each text, find embedding in responses
+ let serde_json::Value::Array(inputs) = &query["input"] else {
+ panic!("Unexpected `input` value")
+ };
+
+ let openai_tokenized_responses = openai_tokenized_responses();
+ let embeddings = if inputs == openai_tokenized_responses.tokens.as_slice() {
+ vec![openai_tokenized_responses.embedding.clone()]
+ } else {
+ let mut embeddings = Vec::new();
+ for input in inputs {
+ let serde_json::Value::String(input) = input else {
+ return ResponseTemplate::new(400).set_body_json(json!({
+ "error": {
+ "message": "Unexpected `input` value",
+ "type": "test_response",
+ "query": query
+ }
+ }))
+ };
+
+ if input == long_text() {
+ return ResponseTemplate::new(400).set_body_json(json!(
+ {
+ "error": {
+ "message": "This model's maximum context length is 8192 tokens, however you requested 10554 tokens (10554 in your prompt; 0 for the completion). Please reduce your prompt; or completion length.",
+ "type": "invalid_request_error",
+ "param": null,
+ "code": null,
+ }
+ }
+ ));
+ }
+
+ let Some(embedding) = openai_responses().get(input, model_dimensions) else {
+ return ResponseTemplate::new(404).set_body_json(json!(
+ {
+ "error": {
+ "message": "Could not find embedding for text",
+ "text": input,
+ "model_dimensions": format!("{model_dimensions:?}"),
+ "type": "add_to_openai_responses_json_please",
+ "query": query,
+ }
+ }
+ ))
+ };
+
+ embeddings.push(embedding.to_vec());
+ }
+ embeddings
+ };
+
+
+ let data : Vec<_> = embeddings.into_iter().enumerate().map(|(index, embedding)| json!({
+ "object": "embedding",
+ "index": index,
+ "embedding": embedding,
+ })).collect();
+
+ // 4. produce output from embeddings
+ ResponseTemplate::new(200).set_body_json(json!({
+ "object": "list",
+ "data": data,
+ "model": model_dimensions.model(),
+ "usage": {
+ "prompt_tokens": "[prompt_tokens]",
+ "total_tokens": "[total_tokens]"
+ }
+ }))
+ })
+ .mount(&mock_server)
+ .await;
+ let url = mock_server.uri();
+
+ let mut embedder_settings = json!({
+ "source": "openAi",
+ "url": url,
+ "apiKey": API_KEY,
+ "documentTemplate": document_template
+ });
+
+ model_dimensions.add_to_settings(&mut embedder_settings);
+
+ (mock_server, embedder_settings)
+}
+
+const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}
+ {%- else -%}
+ Un chien nommé {{doc.name}}, né en {{doc.birthyear}}
+ {%- endif %}, de race {{doc.breed}}."#;
+
+async fn create_mock() -> (MockServer, Value) {
+ create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false).await
+}
+
+async fn create_mock_dimensions() -> (MockServer, Value) {
+ create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false).await
+}
+
+async fn create_mock_small_embedding_model() -> (MockServer, Value) {
+ create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false).await
+}
+
+async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
+ create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false).await
+}
+
+async fn create_fallible_mock() -> (MockServer, Value) {
+ create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true).await
+}
+
+// basic test "it works"
+#[actix_rt::test]
+async fn it_works() {
+ let (_mock, setting) = create_mock().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task["status"], @r###""succeeded""###);
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ let (documents, _code) = index
+ .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+ .await;
+ snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
+ {
+ "results": [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ }
+ ],
+ "offset": 0,
+ "limit": 20,
+ "total": 4
+ }
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "chien de chasse",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "petit chien",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "grand chien de berger des montagnes",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+}
+
+// tokenize long text
+
+// basic test "it works"
+#[actix_rt::test]
+async fn tokenize_long_text() {
+ let (_mock, setting) = create_mock_tokenized().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task["status"], @r###""succeeded""###);
+ let documents = json!([
+ {"id": 0, "text": long_text()}
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 1,
+ "indexedDocuments": 1
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "grand chien de berger des montagnes",
+ "showRankingScore": true,
+ "attributesToRetrieve": ["id"],
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 0,
+ "_rankingScore": 0.07944583892822266
+ }
+ ]
+ "###);
+}
+
+// "wrong parameters"
+
+#[actix_rt::test]
+async fn bad_api_key() {
+ let (_mock, mut setting) = create_mock().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ // wrong API key
+ setting["apiKey"] = "doggo".into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "failed",
+ "type": "settingsUpdate",
+ "canceledBy": null,
+ "details": {
+ "embedders": {
+ "default": {
+ "source": "openAi",
+ "model": "text-embedding-3-large",
+ "apiKey": "XXX...",
+ "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
+ "url": "[url]"
+ }
+ }
+ },
+ "error": {
+ "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
+ "code": "vector_embedding_error",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
+ },
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ // no API key
+ setting.as_object_mut().unwrap().remove("apiKey");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "failed",
+ "type": "settingsUpdate",
+ "canceledBy": null,
+ "details": {
+ "embedders": {
+ "default": {
+ "source": "openAi",
+ "model": "text-embedding-3-large",
+ "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
+ "url": "[url]"
+ }
+ }
+ },
+ "error": {
+ "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
+ "code": "vector_embedding_error",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
+ },
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ // not a string API key
+ setting["apiKey"] = 42.into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"400 Bad Request");
+ snapshot!(response, @r###"
+ {
+ "message": "Invalid value type at `.embedders.default.apiKey`: expected a string, but found a positive integer: `42`",
+ "code": "invalid_settings_embedders",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+ }
+ "###);
+}
+
+// one test with wrong model
+#[actix_rt::test]
+async fn bad_model() {
+ let (_mock, mut setting) = create_mock().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ // wrong model
+ setting["model"] = "doggo".into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"400 Bad Request");
+
+ snapshot!(response, @r###"
+ {
+ "message": "`.embedders.default.model`: Invalid model `doggo` for OpenAI. Supported models: [\"text-embedding-ada-002\", \"text-embedding-3-small\", \"text-embedding-3-large\"]",
+ "code": "invalid_settings_embedders",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+ }
+ "###);
+
+ // not a string model
+ setting["model"] = 42.into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"400 Bad Request");
+ snapshot!(response, @r###"
+ {
+ "message": "Invalid value type at `.embedders.default.model`: expected a string, but found a positive integer: `42`",
+ "code": "invalid_settings_embedders",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+ }
+ "###);
+}
+
+#[actix_rt::test]
+async fn bad_dimensions() {
+ let (_mock, mut setting) = create_mock().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ // null dimensions
+ setting["dimensions"] = 0.into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"400 Bad Request");
+
+ snapshot!(response, @r###"
+ {
+ "message": "`.embedders.default.dimensions`: `dimensions` cannot be zero",
+ "code": "invalid_settings_embedders",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+ }
+ "###);
+
+ // negative dimensions
+ setting["dimensions"] = (-42).into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"400 Bad Request");
+ snapshot!(response, @r###"
+ {
+ "message": "Invalid value type at `.embedders.default.dimensions`: expected a positive integer, but found a negative integer: `-42`",
+ "code": "invalid_settings_embedders",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+ }
+ "###);
+
+ // huge dimensions
+ setting["dimensions"] = (42_000_000).into();
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"400 Bad Request");
+ snapshot!(response, @r###"
+ {
+ "message": "`.embedders.default.dimensions`: Model `text-embedding-3-large` does not support overriding its dimensions to a value higher than 3072. Found 42000000",
+ "code": "invalid_settings_embedders",
+ "type": "invalid_request",
+ "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
+ }
+ "###);
+}
+
+// one test with changed dimensions
+#[actix_rt::test]
+async fn smaller_dimensions() {
+ let (_mock, setting) = create_mock_dimensions().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task["status"], @r###""succeeded""###);
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ let (documents, _code) = index
+ .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+ .await;
+ snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
+ {
+ "results": [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ }
+ ],
+ "offset": 0,
+ "limit": 20,
+ "total": 4
+ }
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "chien de chasse",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "petit chien",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "grand chien de berger des montagnes",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+}
+
+// one test with different models
+#[actix_rt::test]
+async fn small_embedding_model() {
+ let (_mock, setting) = create_mock_small_embedding_model().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task["status"], @r###""succeeded""###);
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ let (documents, _code) = index
+ .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+ .await;
+ snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
+ {
+ "results": [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ }
+ ],
+ "offset": 0,
+ "limit": 20,
+ "total": 4
+ }
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "chien de chasse",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "petit chien",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "grand chien de berger des montagnes",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+}
+
+#[actix_rt::test]
+async fn legacy_embedding_model() {
+ let (_mock, setting) = create_mock_legacy_embedding_model().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task["status"], @r###""succeeded""###);
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ let (documents, _code) = index
+ .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+ .await;
+ snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
+ {
+ "results": [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ }
+ ],
+ "offset": 0,
+ "limit": 20,
+ "total": 4
+ }
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "chien de chasse",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "petit chien",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "grand chien de berger des montagnes",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ }
+ ]
+ "###);
+}
+
+// test with a server that responds 500 on 3 out of 4 calls
+#[actix_rt::test]
+async fn it_still_works() {
+ let (_mock, setting) = create_fallible_mock().await;
+ let server = get_server_vector().await;
+ let index = server.index("doggo");
+
+ let (response, code) = index
+ .update_settings(json!({
+ "embedders": {
+ "default": setting,
+ },
+ }))
+ .await;
+ snapshot!(code, @"202 Accepted");
+ let task = server.wait_task(response.uid()).await;
+ snapshot!(task["status"], @r###""succeeded""###);
+ let documents = json!([
+ {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
+ {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"},
+ {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"},
+ {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"},
+ ]);
+ let (value, code) = index.add_documents(documents, None).await;
+ snapshot!(code, @"202 Accepted");
+ let task = index.wait_task(value.uid()).await;
+ snapshot!(task, @r###"
+ {
+ "uid": "[uid]",
+ "indexUid": "doggo",
+ "status": "succeeded",
+ "type": "documentAdditionOrUpdate",
+ "canceledBy": null,
+ "details": {
+ "receivedDocuments": 4,
+ "indexedDocuments": 4
+ },
+ "error": null,
+ "duration": "[duration]",
+ "enqueuedAt": "[date]",
+ "startedAt": "[date]",
+ "finishedAt": "[date]"
+ }
+ "###);
+
+ let (documents, _code) = index
+ .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+ .await;
+ snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
+ {
+ "results": [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever",
+ "_vectors": {
+ "default": {
+ "embeddings": "[vector]",
+ "regenerate": true
+ }
+ }
+ }
+ ],
+ "offset": 0,
+ "limit": 20,
+ "total": 4
+ }
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "chien de chasse",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "petit chien",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ },
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ }
+ ]
+ "###);
+
+ let (response, code) = index
+ .search_post(json!({
+ "q": "grand chien de berger des montagnes",
+ "hybrid": {"semanticRatio": 1.0}
+ }))
+ .await;
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 0,
+ "name": "kefir",
+ "gender": "M",
+ "birthyear": 2023,
+ "breed": "Patou"
+ },
+ {
+ "id": 1,
+ "name": "Intel",
+ "gender": "M",
+ "birthyear": 2011,
+ "breed": "Beagle"
+ },
+ {
+ "id": 3,
+ "name": "Max",
+ "gender": "M",
+ "birthyear": 1995,
+ "breed": "Labrador Retriever"
+ },
+ {
+ "id": 2,
+ "name": "Vénus",
+ "gender": "F",
+ "birthyear": 2003,
+ "breed": "Jack Russel Terrier"
+ }
+ ]
+ "###);
+}
+// test with a server that wrongly responds 400
diff --git a/meilisearch/tests/vector/openai_responses.json.gz b/meilisearch/tests/vector/openai_responses.json.gz
new file mode 100644
index 000000000..2d27822fe
Binary files /dev/null and b/meilisearch/tests/vector/openai_responses.json.gz differ
diff --git a/meilisearch/tests/vector/openai_tokenized_responses.json.gz b/meilisearch/tests/vector/openai_tokenized_responses.json.gz
new file mode 100644
index 000000000..0c708448c
Binary files /dev/null and b/meilisearch/tests/vector/openai_tokenized_responses.json.gz differ
diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs
index 317ca8676..1a64eeb78 100644
--- a/meilisearch/tests/vector/rest.rs
+++ b/meilisearch/tests/vector/rest.rs
@@ -5,9 +5,9 @@ use reqwest::IntoUrl;
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, Request, ResponseTemplate};
-use crate::common::{Server, Value};
+use crate::common::Value;
use crate::json;
-use crate::vector::GetAllDocumentsOptions;
+use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
@@ -265,22 +265,6 @@ async fn dummy_testing_the_mock() {
snapshot!(body, @r###"{"data":[4,4,4]}"###);
}
-async fn get_server_vector() -> Server {
- let server = Server::new().await;
- let (value, code) = server.set_features(json!({"vectorStore": true})).await;
- snapshot!(code, @"200 OK");
- snapshot!(value, @r###"
- {
- "vectorStore": true,
- "metrics": false,
- "logsRoute": false,
- "editDocumentsByFunction": false,
- "containsFilter": false
- }
- "###);
- server
-}
-
#[actix_rt::test]
async fn bad_request() {
let (mock, _setting) = create_mock().await;
@@ -1816,7 +1800,7 @@ async fn server_custom_header() {
}
},
"error": {
- "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`",
+ "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1858,7 +1842,7 @@ async fn server_custom_header() {
}
},
"error": {
- "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`",
+ "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 3a2f3169c..512e911aa 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -9,7 +9,6 @@ use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
use roaring::RoaringBitmap;
use rstar::RTree;
use serde::{Deserialize, Serialize};
-use time::OffsetDateTime;
use crate::documents::PrimaryKey;
use crate::error::{InternalError, UserError};
@@ -173,8 +172,8 @@ impl Index {
pub fn new_with_creation_dates>(
mut options: heed::EnvOpenOptions,
path: P,
- created_at: OffsetDateTime,
- updated_at: OffsetDateTime,
+ created_at: time::OffsetDateTime,
+ updated_at: time::OffsetDateTime,
) -> Result {
use db_name::*;
@@ -256,22 +255,22 @@ impl Index {
}
pub fn new>(options: heed::EnvOpenOptions, path: P) -> Result {
- let now = OffsetDateTime::now_utc();
+ let now = time::OffsetDateTime::now_utc();
Self::new_with_creation_dates(options, path, now, now)
}
fn set_creation_dates(
env: &heed::Env,
main: Database,
- created_at: OffsetDateTime,
- updated_at: OffsetDateTime,
+ created_at: time::OffsetDateTime,
+ updated_at: time::OffsetDateTime,
) -> heed::Result<()> {
let mut txn = env.write_txn()?;
// The db was just created, we update its metadata with the relevant information.
let main = main.remap_types::>();
if main.get(&txn, main_key::CREATED_AT_KEY)?.is_none() {
- main.put(&mut txn, main_key::UPDATED_AT_KEY, &updated_at)?;
- main.put(&mut txn, main_key::CREATED_AT_KEY, &created_at)?;
+ main.put(&mut txn, main_key::UPDATED_AT_KEY, &OffsetDateTime(updated_at))?;
+ main.put(&mut txn, main_key::CREATED_AT_KEY, &OffsetDateTime(created_at))?;
txn.commit()?;
}
Ok(())
@@ -371,7 +370,7 @@ impl Index {
wtxn: &mut RwTxn<'_>,
primary_key: &str,
) -> heed::Result<()> {
- self.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
+ self.set_updated_at(wtxn, &time::OffsetDateTime::now_utc())?;
self.main.remap_types::().put(wtxn, main_key::PRIMARY_KEY_KEY, primary_key)
}
@@ -1323,7 +1322,7 @@ impl Index {
}
/// Returns the index creation time.
- pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result {
+ pub fn created_at(&self, rtxn: &RoTxn<'_>) -> Result {
Ok(self
.main
.remap_types::>()
@@ -1331,11 +1330,12 @@ impl Index {
.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN,
key: Some(main_key::CREATED_AT_KEY),
- })?)
+ })?
+ .0)
}
/// Returns the index last updated time.
- pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result {
+ pub fn updated_at(&self, rtxn: &RoTxn<'_>) -> Result {
Ok(self
.main
.remap_types::>()
@@ -1343,18 +1343,19 @@ impl Index {
.ok_or(InternalError::DatabaseMissingEntry {
db_name: db_name::MAIN,
key: Some(main_key::UPDATED_AT_KEY),
- })?)
+ })?
+ .0)
}
pub(crate) fn set_updated_at(
&self,
wtxn: &mut RwTxn<'_>,
- time: &OffsetDateTime,
+ time: &time::OffsetDateTime,
) -> heed::Result<()> {
self.main.remap_types::>().put(
wtxn,
main_key::UPDATED_AT_KEY,
- time,
+ &OffsetDateTime(*time),
)
}
@@ -1681,6 +1682,10 @@ pub struct IndexEmbeddingConfig {
pub user_provided: RoaringBitmap,
}
+#[derive(Serialize, Deserialize)]
+#[serde(transparent)]
+struct OffsetDateTime(#[serde(with = "time::serde::rfc3339")] time::OffsetDateTime);
+
#[cfg(test)]
pub(crate) mod tests {
use std::collections::HashSet;
diff --git a/milli/src/localized_attributes_rules.rs b/milli/src/localized_attributes_rules.rs
index 739d03043..3c421ca6b 100644
--- a/milli/src/localized_attributes_rules.rs
+++ b/milli/src/localized_attributes_rules.rs
@@ -90,6 +90,21 @@ impl LocalizedFieldIds {
pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
}
+
+ pub fn all_locales(&self) -> Vec {
+ let mut locales = Vec::new();
+ for field_locales in self.field_id_to_locales.values() {
+ if !field_locales.is_empty() {
+ locales.extend(field_locales);
+ } else {
+ // If a field has no locales, we consider it as not localized
+ return Vec::new();
+ }
+ }
+ locales.sort();
+ locales.dedup();
+ locales
+ }
}
#[cfg(test)]
diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs
index 39fb7374a..cdba7ee16 100644
--- a/milli/src/search/facet/search.rs
+++ b/milli/src/search/facet/search.rs
@@ -339,10 +339,18 @@ impl ValuesCollection {
fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
let options = NormalizerOption { lossy: true, ..Default::default() };
let mut detection = StrDetection::new(facet_string, locales);
+
+ // Detect the language of the facet string only if several locales are explicitly provided.
+ let language = match locales {
+ Some(&[language]) => Some(language),
+ Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
+ _ => None,
+ };
+
let token = Token {
lemma: std::borrow::Cow::Borrowed(facet_string),
script: detection.script(),
- language: detection.language(),
+ language,
..Default::default()
};
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 0f5eb23e1..3057066d2 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -360,6 +360,7 @@ mod test {
use super::*;
#[cfg(feature = "japanese")]
+ #[cfg(not(feature = "chinese-pinyin"))]
#[test]
fn test_kanji_language_detection() {
use crate::index::tests::TempIndex;
diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs
index d33058af1..d1d9d6d9a 100644
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@@ -110,18 +110,18 @@ impl<'ctx> DatabaseCache<'ctx> {
.map_err(Into::into)
}
- fn get_value_from_keys<'v, K1, KC, DC>(
+ fn get_value_from_keys<'v, K1, KC>(
txn: &'ctx RoTxn<'_>,
cache_key: K1,
db_keys: &'v [KC::EItem],
cache: &mut FxHashMap>>,
db: Database,
+ universe: Option<&RoaringBitmap>,
merger: MergeFn,
- ) -> Result