Merge a01bc7b454 into 94fb55bb6f

Merge #5049
5049: Fix the path used in the flaky tests CI r=irevoire a=Kerollmops This PR fixes [the flaky tests CI](https://github.com/meilisearch/meilisearch/actions/runs/11741717787) path used. Co-authored-by: Clément Renault <clement@meilisearch.com>
2024-11-29 16:45:30 +08:00 · 2024-11-13 12:21:41 +01:00 · 2024-11-13 10:26:50 +00:00 · 2024-11-13 10:34:54 +01:00 · 2024-11-13 10:33:59 +01:00 · 2024-11-13 09:52:10 +01:00
17 changed files with 665 additions and 170 deletions
--- a/.github/workflows/flaky-tests.yml
+++ b/.github/workflows/flaky-tests.yml
@ -21,10 +21,10 @@ jobs:
    - name: Install cargo-flaky
      run: cargo install cargo-flaky
    - name: Run cargo flaky in the dumps
-      run: cd dump; cargo flaky -i 100 --release
+      run: cd crates/dump; cargo flaky -i 100 --release
    - name: Run cargo flaky in the index-scheduler
-      run: cd index-scheduler; cargo flaky -i 100 --release
+      run: cd crates/index-scheduler; cargo flaky -i 100 --release
    - name: Run cargo flaky in the auth
-      run: cd meilisearch-auth; cargo flaky -i 100 --release
+      run: cd crates/meilisearch-auth; cargo flaky -i 100 --release
    - name: Run cargo flaky in meilisearch
-      run: cd meilisearch; cargo flaky -i 100 --release
+      run: cd crates/meilisearch; cargo flaky -i 100 --release
--- a/crates/meilisearch/src/metrics.rs
+++ b/crates/meilisearch/src/metrics.rs
@ -49,4 +49,18 @@ lazy_static! {
    pub static ref MEILISEARCH_IS_INDEXING: IntGauge =
        register_int_gauge!(opts!("meilisearch_is_indexing", "Meilisearch Is Indexing"))
            .expect("Can't create a metric");
    pub static ref MEILISEARCH_SEARCH_QUEUE_SIZE: IntGauge = register_int_gauge!(opts!(
        "meilisearch_search_queue_size",
        "Meilisearch Search Queue Size"
    ))
    .expect("Can't create a metric");
    pub static ref MEILISEARCH_SEARCHES_RUNNING: IntGauge =
        register_int_gauge!(opts!("meilisearch_searches_running", "Meilisearch Searches Running"))
            .expect("Can't create a metric");
    pub static ref MEILISEARCH_SEARCHES_WAITING_TO_BE_PROCESSED: IntGauge =
        register_int_gauge!(opts!(
            "meilisearch_searches_waiting_to_be_processed",
            "Meilisearch Searches Being Processed"
        ))
        .expect("Can't create a metric");
 }
--- a/crates/meilisearch/src/routes/metrics.rs
+++ b/crates/meilisearch/src/routes/metrics.rs
@ -10,6 +10,7 @@ use prometheus::{Encoder, TextEncoder};
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::routes::create_all_stats;
 use crate::search_queue::SearchQueue;
 pub fn configure(config: &mut web::ServiceConfig) {
    config.service(web::resource("").route(web::get().to(get_metrics)));
@ -18,6 +19,7 @@ pub fn configure(config: &mut web::ServiceConfig) {
 pub async fn get_metrics(
    index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
    auth_controller: Data<AuthController>,
    search_queue: web::Data<SearchQueue>,
 ) -> Result<HttpResponse, ResponseError> {
    index_scheduler.features().check_metrics()?;
    let auth_filters = index_scheduler.filters();
@ -35,6 +37,11 @@ pub async fn get_metrics(
    crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
    crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);
    crate::metrics::MEILISEARCH_SEARCH_QUEUE_SIZE.set(search_queue.capacity() as i64);
    crate::metrics::MEILISEARCH_SEARCHES_RUNNING.set(search_queue.searches_running() as i64);
    crate::metrics::MEILISEARCH_SEARCHES_WAITING_TO_BE_PROCESSED
        .set(search_queue.searches_waiting() as i64);
    for (index, value) in response.indexes.iter() {
        crate::metrics::MEILISEARCH_INDEX_DOCS_COUNT
            .with_label_values(&[index])
--- a/crates/meilisearch/src/search_queue.rs
+++ b/crates/meilisearch/src/search_queue.rs
@ -18,6 +18,8 @@
 //!                         And should drop the Permit only once you have freed all the RAM consumed by the method.
 use std::num::NonZeroUsize;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::Arc;
 use std::time::Duration;
 use rand::rngs::StdRng;
@ -33,6 +35,8 @@ pub struct SearchQueue {
    /// If we have waited longer than this to get a permit, we should abort the search request entirely.
    /// The client probably already closed the connection, but we have no way to find out.
    time_to_abort: Duration,
    searches_running: Arc<AtomicUsize>,
    searches_waiting_to_be_processed: Arc<AtomicUsize>,
 }
 /// You should only run search requests while holding this permit.
@ -68,14 +72,41 @@ impl SearchQueue {
        // so let's not allocate any RAM and keep a capacity of 1.
        let (sender, receiver) = mpsc::channel(1);
-        tokio::task::spawn(Self::run(capacity, paralellism, receiver));
+        let instance = Self {
-        Self { sender, capacity, time_to_abort: Duration::from_secs(60) }
+            sender,
            capacity,
            time_to_abort: Duration::from_secs(60),
            searches_running: Default::default(),
            searches_waiting_to_be_processed: Default::default(),
        };
        tokio::task::spawn(Self::run(
            capacity,
            paralellism,
            receiver,
            Arc::clone(&instance.searches_running),
            Arc::clone(&instance.searches_waiting_to_be_processed),
        ));
        instance
    }
    pub fn with_time_to_abort(self, time_to_abort: Duration) -> Self {
        Self { time_to_abort, ..self }
    }
    pub fn capacity(&self) -> usize {
        self.capacity
    }
    pub fn searches_running(&self) -> usize {
        self.searches_running.load(Ordering::Relaxed)
    }
    pub fn searches_waiting(&self) -> usize {
        self.searches_waiting_to_be_processed.load(Ordering::Relaxed)
    }
    /// This function is the main loop, it's in charge on scheduling which search request should execute first and
    /// how many should executes at the same time.
    ///
@ -84,6 +115,8 @@ impl SearchQueue {
        capacity: usize,
        parallelism: NonZeroUsize,
        mut receive_new_searches: mpsc::Receiver<oneshot::Sender<Permit>>,
        metric_searches_running: Arc<AtomicUsize>,
        metric_searches_waiting: Arc<AtomicUsize>,
    ) {
        let mut queue: Vec<oneshot::Sender<Permit>> = Default::default();
        let mut rng: StdRng = StdRng::from_entropy();
@ -133,6 +166,9 @@ impl SearchQueue {
                    queue.push(search_request);
                },
            }
            metric_searches_running.store(searches_running, Ordering::Relaxed);
            metric_searches_waiting.store(queue.len(), Ordering::Relaxed);
        }
    }
--- a/crates/meilisearch/tests/common/mod.rs
+++ b/crates/meilisearch/tests/common/mod.rs
@ -389,3 +389,25 @@ pub static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
      },
    ])
 });
 pub async fn shared_index_with_test_set() -> &'static Index<'static, Shared> {
    static INDEX: OnceCell<Index<'static, Shared>> = OnceCell::const_new();
    INDEX
        .get_or_init(|| async {
            let server = Server::new_shared();
            let index = server._index("SHARED_TEST_SET").to_shared();
            let url = format!("/indexes/{}/documents", urlencoding::encode(index.uid.as_ref()));
            let (response, code) = index
                .service
                .post_str(
                    url,
                    include_str!("../assets/test_set.json"),
                    vec![("content-type", "application/json")],
                )
                .await;
            assert_eq!(code, 202);
            index.wait_task(response.uid()).await;
            index
        })
        .await
 }
--- a/crates/meilisearch/tests/documents/add_documents.rs
+++ b/crates/meilisearch/tests/documents/add_documents.rs
@ -1335,7 +1335,6 @@ async fn error_add_documents_missing_document_id() {
 }
 #[actix_rt::test]
 #[should_panic]
 async fn error_document_field_limit_reached_in_one_document() {
    let server = Server::new().await;
    let index = server.index("test");
@ -1352,7 +1351,7 @@ async fn error_document_field_limit_reached_in_one_document() {
    let documents = json!([big_object]);
    let (response, code) = index.update_documents(documents, Some("id")).await;
-    snapshot!(code, @"500 Internal Server Error");
+    snapshot!(code, @"202 Accepted");
    let response = index.wait_task(response.uid()).await;
    snapshot!(code, @"202 Accepted");
@ -1360,16 +1359,21 @@ async fn error_document_field_limit_reached_in_one_document() {
    snapshot!(response,
        @r###"
    {
-      "uid": 1,
+      "uid": "[uid]",
      "indexUid": "test",
-      "status": "succeeded",
+      "status": "failed",
      "type": "documentAdditionOrUpdate",
      "canceledBy": null,
      "details": {
        "receivedDocuments": 1,
-        "indexedDocuments": 1
+        "indexedDocuments": 0
      },
      "error": {
        "message": "A document cannot contain more than 65,535 fields.",
        "code": "max_fields_limit_exceeded",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#max_fields_limit_exceeded"
      },
      "error": null,
      "duration": "[duration]",
      "enqueuedAt": "[date]",
      "startedAt": "[date]",
--- a/crates/meilisearch/tests/documents/get_documents.rs
+++ b/crates/meilisearch/tests/documents/get_documents.rs
@ -4,24 +4,27 @@ use meili_snap::*;
 use urlencoding::encode as urlencode;
 use crate::common::encoder::Encoder;
-use crate::common::{GetAllDocumentsOptions, Server, Value};
+use crate::common::{
    shared_does_not_exists_index, shared_empty_index, shared_index_with_test_set,
    GetAllDocumentsOptions, Server, Value,
 };
 use crate::json;
 // TODO: partial test since we are testing error, amd error is not yet fully implemented in
 // transplant
 #[actix_rt::test]
 async fn get_unexisting_index_single_document() {
-    let server = Server::new().await;
+    let (_response, code) = shared_does_not_exists_index().await.get_document(1, None).await;
    let (_response, code) = server.index("test").get_document(1, None).await;
    assert_eq!(code, 404);
 }
 #[actix_rt::test]
 async fn error_get_unexisting_document() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
-    index.create(None).await;
+    let (task, _code) = index.create(None).await;
-    index.wait_task(0).await;
+    index.wait_task(task.uid()).await.succeeded();
    let (response, code) = index.get_document(1, None).await;
    let expected_response = json!({
@ -37,18 +40,19 @@ async fn error_get_unexisting_document() {
 #[actix_rt::test]
 async fn get_document() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
-    index.create(None).await;
+    let (task, _code) = index.create(None).await;
    index.wait_task(task.uid()).await.succeeded();
    let documents = json!([
        {
            "id": 0,
            "nested": { "content": "foobar" },
        }
    ]);
-    let (_, code) = index.add_documents(documents, None).await;
+    let (task, code) = index.add_documents(documents, None).await;
    assert_eq!(code, 202);
-    index.wait_task(1).await;
+    index.wait_task(task.uid()).await.succeeded();
    let (response, code) = index.get_document(0, None).await;
    assert_eq!(code, 200);
    assert_eq!(
@ -81,12 +85,11 @@ async fn get_document() {
 #[actix_rt::test]
 async fn error_get_unexisting_index_all_documents() {
-    let server = Server::new().await;
+    let index = shared_does_not_exists_index().await;
-    let (response, code) =
+    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
        server.index("test").get_all_documents(GetAllDocumentsOptions::default()).await;
    let expected_response = json!({
-        "message": "Index `test` not found.",
+        "message": "Index `DOES_NOT_EXISTS` not found.",
        "code": "index_not_found",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#index_not_found"
@ -98,12 +101,7 @@ async fn error_get_unexisting_index_all_documents() {
 #[actix_rt::test]
 async fn get_no_document() {
-    let server = Server::new().await;
+    let index = shared_empty_index().await;
    let index = server.index("test");
    let (_, code) = index.create(None).await;
    assert_eq!(code, 202);
    index.wait_task(0).await;
    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    assert_eq!(code, 200);
@ -112,14 +110,12 @@ async fn get_no_document() {
 #[actix_rt::test]
 async fn get_all_documents_no_options() {
-    let server = Server::new().await;
+    let index = shared_index_with_test_set().await;
    let index = server.index("test");
    index.load_test_set().await;
    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    assert_eq!(code, 200);
-    let arr = response["results"].as_array().unwrap();
+    let results = response["results"].as_array().unwrap();
-    assert_eq!(arr.len(), 20);
+    assert_eq!(results.len(), 20);
    let first = json!({
        "id":0,
        "isActive":false,
@ -138,19 +134,16 @@ async fn get_all_documents_no_options() {
        "longitude":-145.725388,
        "tags":["bug"
            ,"bug"]});
-    assert_eq!(first, arr[0]);
+    assert_eq!(first, results[0]);
 }
 #[actix_rt::test]
 async fn get_all_documents_no_options_with_response_compression() {
-    let server = Server::new().await;
+    let index = shared_index_with_test_set().await;
    let index_uid = "test";
    let index = server.index(index_uid);
    index.load_test_set().await;
-    let app = server.init_web_app().await;
+    let app = Server::new_shared().init_web_app().await;
    let req = test::TestRequest::get()
-        .uri(&format!("/indexes/{}/documents?", urlencode(index_uid)))
+        .uri(&format!("/indexes/{}/documents?", urlencode(&index.uid)))
        .insert_header((ACCEPT_ENCODING, "gzip"))
        .to_request();
@ -169,9 +162,7 @@ async fn get_all_documents_no_options_with_response_compression() {
 #[actix_rt::test]
 async fn test_get_all_documents_limit() {
-    let server = Server::new().await;
+    let index = shared_index_with_test_set().await;
    let index = server.index("test");
    index.load_test_set().await;
    let (response, code) = index
        .get_all_documents(GetAllDocumentsOptions { limit: Some(5), ..Default::default() })
@ -186,9 +177,7 @@ async fn test_get_all_documents_limit() {
 #[actix_rt::test]
 async fn test_get_all_documents_offset() {
-    let server = Server::new().await;
+    let index = shared_index_with_test_set().await;
    let index = server.index("test");
    index.load_test_set().await;
    let (response, code) = index
        .get_all_documents(GetAllDocumentsOptions { offset: Some(5), ..Default::default() })
@ -203,9 +192,7 @@ async fn test_get_all_documents_offset() {
 #[actix_rt::test]
 async fn test_get_all_documents_attributes_to_retrieve() {
-    let server = Server::new().await;
+    let index = shared_index_with_test_set().await;
    let index = server.index("test");
    index.load_test_set().await;
    let (response, code) = index
        .get_all_documents(GetAllDocumentsOptions {
@ -286,9 +273,11 @@ async fn test_get_all_documents_attributes_to_retrieve() {
 #[actix_rt::test]
 async fn get_document_s_nested_attributes_to_retrieve() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
-    index.create(None).await;
+    let (task, _code) = index.create(None).await;
    index.wait_task(task.uid()).await.succeeded();
    let documents = json!([
        {
            "id": 0,
@ -302,9 +291,9 @@ async fn get_document_s_nested_attributes_to_retrieve() {
            },
        },
    ]);
-    let (_, code) = index.add_documents(documents, None).await;
+    let (task, code) = index.add_documents(documents, None).await;
    assert_eq!(code, 202);
-    index.wait_task(1).await;
+    index.wait_task(task.uid()).await.succeeded();
    let (response, code) = index.get_document(0, Some(json!({ "fields": ["content"] }))).await;
    assert_eq!(code, 200);
@ -343,10 +332,10 @@ async fn get_document_s_nested_attributes_to_retrieve() {
 #[actix_rt::test]
 async fn get_documents_displayed_attributes_is_ignored() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
    index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
    index.load_test_set().await;
    index.update_settings(json!({"displayedAttributes": ["gender"]})).await;
    let (response, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    assert_eq!(code, 200);
@ -366,10 +355,10 @@ async fn get_documents_displayed_attributes_is_ignored() {
 #[actix_rt::test]
 async fn get_document_by_filter() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("doggo");
+    let index = server.unique_index();
    index.update_settings_filterable_attributes(json!(["color"])).await;
-    index
+    let (task, _code) = index
        .add_documents(
            json!([
                { "id": 0, "color": "red" },
@ -380,7 +369,7 @@ async fn get_document_by_filter() {
            Some("id"),
        )
        .await;
-    index.wait_task(1).await;
+    index.wait_task(task.uid()).await.succeeded();
    let (response, code) = index.get_document_by_filter(json!({})).await;
    let (response2, code2) = index.get_all_documents_raw("").await;
@ -552,7 +541,7 @@ async fn get_document_with_vectors() {
        }))
        .await;
    snapshot!(code, @"202 Accepted");
-    server.wait_task(response.uid()).await;
+    server.wait_task(response.uid()).await.succeeded();
    let documents = json!([
      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
@ -560,7 +549,7 @@ async fn get_document_with_vectors() {
    ]);
    let (value, code) = index.add_documents(documents, None).await;
    snapshot!(code, @"202 Accepted");
-    index.wait_task(value.uid()).await;
+    index.wait_task(value.uid()).await.succeeded();
    // by default you shouldn't see the `_vectors` object
    let (documents, _code) = index.get_all_documents(Default::default()).await;
--- a/crates/meilisearch/tests/search/formatted.rs
+++ b/crates/meilisearch/tests/search/formatted.rs
@ -6,14 +6,14 @@ use crate::json;
 #[actix_rt::test]
 async fn formatted_contain_wildcard() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
    index.update_settings(json!({ "displayedAttributes": ["id", "cattos"] })).await;
    let documents = NESTED_DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
+    let (response, _) = index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await;
    index.search(json!({ "q": "pésti", "attributesToRetrieve": ["father", "mother"], "attributesToHighlight": ["father", "mother", "*"], "attributesToCrop": ["doggos"], "showMatchesPosition": true }),
        |response, code|
@ -135,12 +135,7 @@ async fn formatted_contain_wildcard() {
 #[actix_rt::test]
 async fn format_nested() {
-    let server = Server::new().await;
+    let index = shared_index_with_nested_documents().await;
    let index = server.index("test");
    let documents = NESTED_DOCUMENTS.clone();
    index.add_documents(documents, None).await;
    index.wait_task(0).await;
    index
        .search(json!({ "q": "pésti", "attributesToRetrieve": ["doggos"] }), |response, code| {
@ -340,15 +335,15 @@ async fn format_nested() {
 #[actix_rt::test]
 async fn displayedattr_2_smol() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
    // not enough displayed for the other settings
    index.update_settings(json!({ "displayedAttributes": ["id"] })).await;
    let documents = NESTED_DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
+    let (response, _) = index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await;
    index
        .search(json!({ "attributesToRetrieve": ["father", "id"], "attributesToHighlight": ["mother"], "attributesToCrop": ["cattos"] }),
@ -538,15 +533,15 @@ async fn displayedattr_2_smol() {
 #[cfg(feature = "default")]
 #[actix_rt::test]
 async fn test_cjk_highlight() {
-    let server = Server::new().await;
+    let server = Server::new_shared();
-    let index = server.index("test");
+    let index = server.unique_index();
    let documents = json!([
        { "id": 0, "title": "この度、クーポンで無料で頂きました。" },
        { "id": 1, "title": "大卫到了扫罗那里" },
    ]);
-    index.add_documents(documents, None).await;
+    let (response, _) = index.add_documents(documents, None).await;
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
    index
        .search(json!({"q": "で", "attributesToHighlight": ["title"]}), |response, code| {
--- a/crates/meilisearch/tests/search/multi.rs
+++ b/crates/meilisearch/tests/search/multi.rs
@ -4346,10 +4346,10 @@ async fn federation_vector_two_indexes() {
    let (response, code) = server
        .multi_search(json!({"federation": {}, "queries": [
-        {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}},
+        {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "retrieveVectors": true},
        // joyful and energetic first
-        {"indexUid": "vectors-sentiment", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}},
+        {"indexUid": "vectors-sentiment", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "retrieveVectors": true},
-        {"indexUid": "vectors-sentiment", "q": "dog"},
+        {"indexUid": "vectors-sentiment", "q": "dog", "retrieveVectors": true},
        ]}))
        .await;
    snapshot!(code, @"200 OK");
@ -4364,7 +4364,16 @@ async fn federation_vector_two_indexes() {
              0.8,
              0.09,
              0.8
            ],
            "sentiment": {
              "embeddings": [
                [
                  0.800000011920929,
                  0.30000001192092896
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4379,7 +4388,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              0.8,
              0.3
            ],
            "animal": {
              "embeddings": [
                [
                  0.800000011920929,
                  0.09000000357627869,
                  0.800000011920929
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4394,7 +4413,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              -1.0,
              0.1
            ],
            "animal": {
              "embeddings": [
                [
                  0.8500000238418579,
                  0.019999999552965164,
                  0.10000000149011612
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4410,7 +4439,16 @@ async fn federation_vector_two_indexes() {
              0.9,
              0.8,
              0.05
            ],
            "sentiment": {
              "embeddings": [
                [
                  -0.10000000149011612,
                  0.550000011920929
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4426,7 +4464,16 @@ async fn federation_vector_two_indexes() {
              0.85,
              0.02,
              0.1
            ],
            "sentiment": {
              "embeddings": [
                [
                  -1.0,
                  0.10000000149011612
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4441,7 +4488,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              -0.2,
              0.65
            ],
            "animal": {
              "embeddings": [
                [
                  0.800000011920929,
                  0.8999999761581421,
                  0.5
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4456,7 +4513,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              -0.1,
              0.55
            ],
            "animal": {
              "embeddings": [
                [
                  0.8999999761581421,
                  0.800000011920929,
                  0.05000000074505806
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4472,7 +4539,16 @@ async fn federation_vector_two_indexes() {
              0.8,
              0.9,
              0.5
            ],
            "sentiment": {
              "embeddings": [
                [
                  -0.20000000298023224,
                  0.6499999761581421
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4492,8 +4568,8 @@ async fn federation_vector_two_indexes() {
    // hybrid search, distinct embedder
    let (response, code) = server
        .multi_search(json!({"federation": {}, "queries": [
-          {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true},
+          {"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true, "retrieveVectors": true},
-          {"indexUid": "vectors-sentiment", "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true},
+          {"indexUid": "vectors-sentiment", "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true, "retrieveVectors": true,},
        ]}))
        .await;
    snapshot!(code, @"200 OK");
@ -4507,7 +4583,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              0.8,
              0.3
            ],
            "animal": {
              "embeddings": [
                [
                  0.800000011920929,
                  0.09000000357627869,
                  0.800000011920929
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4523,7 +4609,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              -1.0,
              0.1
            ],
            "animal": {
              "embeddings": [
                [
                  0.8500000238418579,
                  0.019999999552965164,
                  0.10000000149011612
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4540,7 +4636,16 @@ async fn federation_vector_two_indexes() {
              0.85,
              0.02,
              0.1
            ],
            "sentiment": {
              "embeddings": [
                [
                  -1.0,
                  0.10000000149011612
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4557,7 +4662,16 @@ async fn federation_vector_two_indexes() {
              0.8,
              0.9,
              0.5
            ],
            "sentiment": {
              "embeddings": [
                [
                  -0.20000000298023224,
                  0.6499999761581421
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4573,7 +4687,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              -0.2,
              0.65
            ],
            "animal": {
              "embeddings": [
                [
                  0.800000011920929,
                  0.8999999761581421,
                  0.5
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4589,7 +4713,17 @@ async fn federation_vector_two_indexes() {
            "sentiment": [
              -0.1,
              0.55
            ],
            "animal": {
              "embeddings": [
                [
                  0.8999999761581421,
                  0.800000011920929,
                  0.05000000074505806
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-animal",
@ -4606,7 +4740,16 @@ async fn federation_vector_two_indexes() {
              0.9,
              0.8,
              0.05
            ],
            "sentiment": {
              "embeddings": [
                [
                  -0.10000000149011612,
                  0.550000011920929
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
@ -4623,7 +4766,16 @@ async fn federation_vector_two_indexes() {
              0.8,
              0.09,
              0.8
            ],
            "sentiment": {
              "embeddings": [
                [
                  0.800000011920929,
                  0.30000001192092896
                ]
              ],
              "regenerate": false
            }
          },
          "_federation": {
            "indexUid": "vectors-sentiment",
--- a/crates/meilisearch/tests/vector/mod.rs
+++ b/crates/meilisearch/tests/vector/mod.rs
@ -249,7 +249,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
+        "message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n  - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -278,7 +278,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
+        "message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n  - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -308,7 +308,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
+        "message": "Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -320,8 +320,7 @@ async fn user_provided_embeddings_error() {
    }
    "###);
-    let documents =
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true, "regenerate": true }}});
        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
    let (value, code) = index.add_documents(documents, None).await;
    snapshot!(code, @"202 Accepted");
    let task = index.wait_task(value.uid()).await;
@ -337,7 +336,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
+        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -349,8 +348,7 @@ async fn user_provided_embeddings_error() {
    }
    "###);
-    let documents =
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true], "regenerate": true }}});
        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
    let (value, code) = index.add_documents(documents, None).await;
    snapshot!(code, @"202 Accepted");
    let task = index.wait_task(value.uid()).await;
@ -366,7 +364,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
+        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -378,8 +376,7 @@ async fn user_provided_embeddings_error() {
    }
    "###);
-    let documents =
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]], "regenerate": false }}});
        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
    let (value, code) = index.add_documents(documents, None).await;
    snapshot!(code, @"202 Accepted");
    let task = index.wait_task(value.uid()).await;
@ -395,7 +392,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
+        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -436,7 +433,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
+        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -464,7 +461,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
+        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -492,7 +489,7 @@ async fn user_provided_embeddings_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
+        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
        "code": "invalid_vectors_type",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -532,7 +529,7 @@ async fn user_provided_vectors_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "While embedding documents for embedder `manual`: no vectors provided for document \"40\" and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
+        "message": "While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
        "code": "vector_embedding_error",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -561,7 +558,7 @@ async fn user_provided_vectors_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
+        "message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
        "code": "vector_embedding_error",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -590,7 +587,7 @@ async fn user_provided_vectors_error() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
+        "message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
        "code": "vector_embedding_error",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@ -122,7 +122,7 @@ and can not be more than 512 bytes.", .document_id.to_string()
    #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
    InvalidVectorsMapType { document_id: String, value: Value },
    #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
-    InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
+    InvalidVectorsEmbedderConf { document_id: String, error: String },
    #[error("{0}")]
    InvalidFilter(String),
    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@ -97,7 +97,7 @@ impl<'doc> Insertion<'doc> {
        doc_alloc: &'doc Bump,
        embedders: &'doc EmbeddingConfigs,
    ) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
-        VectorDocumentFromVersions::new(&self.new, doc_alloc, embedders)
+        VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
    }
 }
@ -169,7 +169,7 @@ impl<'doc> Update<'doc> {
        doc_alloc: &'doc Bump,
        embedders: &'doc EmbeddingConfigs,
    ) -> Result<Option<VectorDocumentFromVersions<'doc>>> {
-        VectorDocumentFromVersions::new(&self.new, doc_alloc, embedders)
+        VectorDocumentFromVersions::new(self.external_document_id, &self.new, doc_alloc, embedders)
    }
    pub fn merged_vectors<Mapper: FieldIdMapper>(
@ -181,10 +181,22 @@ impl<'doc> Update<'doc> {
        embedders: &'doc EmbeddingConfigs,
    ) -> Result<Option<MergedVectorDocument<'doc>>> {
        if self.has_deletion {
-            MergedVectorDocument::without_db(&self.new, doc_alloc, embedders)
+            MergedVectorDocument::without_db(
                self.external_document_id,
                &self.new,
                doc_alloc,
                embedders,
            )
        } else {
            MergedVectorDocument::with_db(
-                self.docid, index, rtxn, mapper, &self.new, doc_alloc, embedders,
+                self.docid,
                self.external_document_id,
                index,
                rtxn,
                mapper,
                &self.new,
                doc_alloc,
                embedders,
            )
        }
    }
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@ -126,7 +126,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                        .into_vec(&context.doc_alloc, embedder_name)
                                        .map_err(|error| UserError::InvalidVectorsEmbedderConf {
                                            document_id: update.external_document_id().to_string(),
-                                            error,
+                                            error: error.to_string(),
                                        })?,
                                );
                            } else if new_vectors.regenerate {
@ -151,6 +151,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                if new_rendered != old_rendered {
                                    chunks.set_autogenerated(
                                        update.docid(),
                                        update.external_document_id(),
                                        new_rendered,
                                        &unused_vectors_distribution,
                                    )?;
@ -178,6 +179,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                            if new_rendered != old_rendered {
                                chunks.set_autogenerated(
                                    update.docid(),
                                    update.external_document_id(),
                                    new_rendered,
                                    &unused_vectors_distribution,
                                )?;
@ -210,7 +212,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                            document_id: insertion
                                                .external_document_id()
                                                .to_string(),
-                                            error,
+                                            error: error.to_string(),
                                        })?,
                                );
                            } else if new_vectors.regenerate {
@ -221,6 +223,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                )?;
                                chunks.set_autogenerated(
                                    insertion.docid(),
                                    insertion.external_document_id(),
                                    rendered,
                                    &unused_vectors_distribution,
                                )?;
@ -233,6 +236,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                            )?;
                            chunks.set_autogenerated(
                                insertion.docid(),
                                insertion.external_document_id(),
                                rendered,
                                &unused_vectors_distribution,
                            )?;
@ -268,6 +272,7 @@ struct Chunks<'a, 'extractor> {
    user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
    threads: &'a ThreadPoolNoAbort,
    sender: &'a EmbeddingSender<'a>,
    has_manual_generation: Option<&'a str>,
 }
 impl<'a, 'extractor> Chunks<'a, 'extractor> {
@ -297,15 +302,22 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
            embedder_id,
            embedder_name,
            user_provided,
            has_manual_generation: None,
        }
    }
    pub fn set_autogenerated(
        &mut self,
        docid: DocumentId,
        external_docid: &'a str,
        rendered: &'a str,
        unused_vectors_distribution: &UnusedVectorsDistributionBump,
    ) -> Result<()> {
        let is_manual = matches!(&self.embedder, &Embedder::UserProvided(_));
        if is_manual {
            self.has_manual_generation.get_or_insert(external_docid);
        }
        if self.texts.len() < self.texts.capacity() {
            self.texts.push(rendered);
            self.ids.push(docid);
@ -322,6 +334,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
            unused_vectors_distribution,
            self.threads,
            self.sender,
            self.has_manual_generation.take(),
        )
    }
@ -339,6 +352,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
            unused_vectors_distribution,
            self.threads,
            self.sender,
            self.has_manual_generation,
        );
        // optimization: don't run bvec dtors as they only contain bumpalo allocated stuff
        std::mem::forget(self);
@ -356,7 +370,46 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
        unused_vectors_distribution: &UnusedVectorsDistributionBump,
        threads: &ThreadPoolNoAbort,
        sender: &EmbeddingSender<'a>,
        has_manual_generation: Option<&'a str>,
    ) -> Result<()> {
        if let Some(external_docid) = has_manual_generation {
            let mut msg = format!(
                r"While embedding documents for embedder `{embedder_name}`: no vectors provided for document `{}`{}",
                external_docid,
                if ids.len() > 1 {
                    format!(" and at least {} other document(s)", ids.len() - 1)
                } else {
                    "".to_string()
                }
            );
            msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`.");
            let mut hint_count = 0;
            for (vector_misspelling, count) in possible_embedding_mistakes.vector_mistakes().take(2)
            {
                msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s).");
                hint_count += 1;
            }
            for (embedder_misspelling, count) in possible_embedding_mistakes
                .embedder_mistakes_bump(embedder_name, unused_vectors_distribution)
                .take(2)
            {
                msg += &format!("\n- Hint: try replacing `_vectors.{embedder_misspelling}` by `_vectors.{embedder_name}` in {count} document(s).");
                hint_count += 1;
            }
            if hint_count == 0 {
                msg += &format!(
                    "\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`"
                );
            }
            return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)));
        }
        let res = match embedder.embed_chunks_ref(texts.as_slice(), threads) {
            Ok(embeddings) => {
                for (docid, embedding) in ids.into_iter().zip(embeddings) {
--- a/crates/milli/src/update/new/indexer/de.rs
+++ b/crates/milli/src/update/new/indexer/de.rs
@ -41,6 +41,11 @@ impl<'de, 'p, 'indexer: 'de, Mapper: MutFieldIdMapper> Visitor<'de>
    where
        A: serde::de::MapAccess<'de>,
    {
        // We need to remember if we encountered a semantic error, because raw values don't like to be parsed partially
        // (trying to do so results in parsing errors).
        // So we'll exhaust all keys and values even if we encounter an error, and we'll then return any error we detected.
        let mut attribute_limit_reached = false;
        let mut document_id_extraction_error = None;
        let mut docid = None;
        while let Some(((level_name, right), (fid, fields_ids_map))) =
@ -49,19 +54,35 @@ impl<'de, 'p, 'indexer: 'de, Mapper: MutFieldIdMapper> Visitor<'de>
                visitor: MutFieldIdMapVisitor(self.fields_ids_map),
            })?
        {
            let Some(_fid) = fid else {
                return Ok(Err(crate::UserError::AttributeLimitReached));
            };
            self.fields_ids_map = fields_ids_map;
            let value: &'de RawValue = map.next_value()?;
            if attribute_limit_reached || document_id_extraction_error.is_some() {
                continue;
            }
            let Some(_fid) = fid else {
                attribute_limit_reached = true;
                continue;
            };
            match match_component(level_name, right, value, self.indexer, &mut docid) {
                ControlFlow::Continue(()) => continue,
                ControlFlow::Break(Err(err)) => return Err(serde::de::Error::custom(err)),
-                ControlFlow::Break(Ok(err)) => return Ok(Ok(Err(err))),
+                ControlFlow::Break(Ok(err)) => {
                    document_id_extraction_error = Some(err);
                    continue;
                }
            }
        }
        // return previously detected errors
        if attribute_limit_reached {
            return Ok(Err(UserError::AttributeLimitReached));
        }
        if let Some(document_id_extraction_error) = document_id_extraction_error {
            return Ok(Ok(Err(document_id_extraction_error)));
        }
        Ok(Ok(match docid {
            Some(docid) => Ok(docid),
--- a/crates/milli/src/update/new/vector_document.rs
+++ b/crates/milli/src/update/new/vector_document.rs
@ -12,7 +12,7 @@ use super::indexer::de::DeserrRawValue;
 use crate::documents::FieldIdMapper;
 use crate::index::IndexEmbeddingConfig;
 use crate::vector::parsed_vectors::{
-    RawVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
+    RawVectors, RawVectorsError, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
 };
 use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfigs};
 use crate::{DocumentId, Index, InternalError, Result, UserError};
@ -143,7 +143,14 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
                Ok((&*config_name, entry))
            })
            .chain(self.vectors_field.iter().flat_map(|map| map.iter()).map(|(name, value)| {
-                Ok((name, entry_from_raw_value(value, false).map_err(InternalError::SerdeJson)?))
+                Ok((
                    name,
                    entry_from_raw_value(value, false).map_err(|_| {
                        InternalError::Serialization(crate::SerializationError::Decoding {
                            db_name: Some(crate::index::db_name::VECTOR_ARROY),
                        })
                    })?,
                ))
            }))
    }
@ -155,20 +162,38 @@ impl<'t> VectorDocument<'t> for VectorDocumentFromDb<'t> {
                Some(self.entry_from_db(embedder_id, config)?)
            }
            None => match self.vectors_field.as_ref().and_then(|obkv| obkv.get(key)) {
-                Some(embedding_from_doc) => Some(
+                Some(embedding_from_doc) => {
-                    entry_from_raw_value(embedding_from_doc, false)
+                    Some(entry_from_raw_value(embedding_from_doc, false).map_err(|_| {
-                        .map_err(InternalError::SerdeJson)?,
+                        InternalError::Serialization(crate::SerializationError::Decoding {
-                ),
+                            db_name: Some(crate::index::db_name::VECTOR_ARROY),
                        })
                    })?)
                }
                None => None,
            },
        })
    }
 }
 fn entry_from_raw_value_user<'doc>(
    external_docid: &str,
    embedder_name: &str,
    value: &'doc RawValue,
    has_configured_embedder: bool,
 ) -> Result<VectorEntry<'doc>> {
    entry_from_raw_value(value, has_configured_embedder).map_err(|error| {
        UserError::InvalidVectorsEmbedderConf {
            document_id: external_docid.to_string(),
            error: error.msg(embedder_name),
        }
        .into()
    })
 }
 fn entry_from_raw_value(
    value: &RawValue,
    has_configured_embedder: bool,
-) -> std::result::Result<VectorEntry<'_>, serde_json::Error> {
+) -> std::result::Result<VectorEntry<'_>, RawVectorsError> {
    let value: RawVectors = RawVectors::from_raw_value(value)?;
    Ok(match value {
@ -194,12 +219,14 @@ fn entry_from_raw_value(
 }
 pub struct VectorDocumentFromVersions<'doc> {
    external_document_id: &'doc str,
    vectors: RawMap<'doc>,
    embedders: &'doc EmbeddingConfigs,
 }
 impl<'doc> VectorDocumentFromVersions<'doc> {
    pub fn new(
        external_document_id: &'doc str,
        versions: &Versions<'doc>,
        bump: &'doc Bump,
        embedders: &'doc EmbeddingConfigs,
@ -208,7 +235,7 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
        if let Some(vectors_field) = document.vectors_field()? {
            let vectors =
                RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
-            Ok(Some(Self { vectors, embedders }))
+            Ok(Some(Self { external_document_id, vectors, embedders }))
        } else {
            Ok(None)
        }
@ -218,16 +245,24 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
 impl<'doc> VectorDocument<'doc> for VectorDocumentFromVersions<'doc> {
    fn iter_vectors(&self) -> impl Iterator<Item = Result<(&'doc str, VectorEntry<'doc>)>> {
        self.vectors.iter().map(|(embedder, vectors)| {
-            let vectors = entry_from_raw_value(vectors, self.embedders.contains(embedder))
+            let vectors = entry_from_raw_value_user(
-                .map_err(UserError::SerdeJson)?;
+                self.external_document_id,
                embedder,
                vectors,
                self.embedders.contains(embedder),
            )?;
            Ok((embedder, vectors))
        })
    }
    fn vectors_for_key(&self, key: &str) -> Result<Option<VectorEntry<'doc>>> {
        let Some(vectors) = self.vectors.get(key) else { return Ok(None) };
-        let vectors = entry_from_raw_value(vectors, self.embedders.contains(key))
+        let vectors = entry_from_raw_value_user(
-            .map_err(UserError::SerdeJson)?;
+            self.external_document_id,
            key,
            vectors,
            self.embedders.contains(key),
        )?;
        Ok(Some(vectors))
    }
 }
@ -238,8 +273,10 @@ pub struct MergedVectorDocument<'doc> {
 }
 impl<'doc> MergedVectorDocument<'doc> {
    #[allow(clippy::too_many_arguments)]
    pub fn with_db<Mapper: FieldIdMapper>(
        docid: DocumentId,
        external_document_id: &'doc str,
        index: &'doc Index,
        rtxn: &'doc RoTxn,
        db_fields_ids_map: &'doc Mapper,
@ -248,16 +285,20 @@ impl<'doc> MergedVectorDocument<'doc> {
        embedders: &'doc EmbeddingConfigs,
    ) -> Result<Option<Self>> {
        let db = VectorDocumentFromDb::new(docid, index, rtxn, db_fields_ids_map, doc_alloc)?;
-        let new_doc = VectorDocumentFromVersions::new(versions, doc_alloc, embedders)?;
+        let new_doc =
            VectorDocumentFromVersions::new(&external_document_id, versions, doc_alloc, embedders)?;
        Ok(if db.is_none() && new_doc.is_none() { None } else { Some(Self { new_doc, db }) })
    }
    pub fn without_db(
        external_document_id: &'doc str,
        versions: &Versions<'doc>,
        doc_alloc: &'doc Bump,
        embedders: &'doc EmbeddingConfigs,
    ) -> Result<Option<Self>> {
-        let Some(new_doc) = VectorDocumentFromVersions::new(versions, doc_alloc, embedders)? else {
+        let Some(new_doc) =
            VectorDocumentFromVersions::new(external_document_id, versions, doc_alloc, embedders)?
        else {
            return Ok(None);
        };
        Ok(Some(Self { new_doc: Some(new_doc), db: None }))
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@ -648,7 +648,7 @@ impl Embedder {
            Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(),
            Embedder::OpenAi(embedder) => embedder.chunk_count_hint(),
            Embedder::Ollama(embedder) => embedder.chunk_count_hint(),
-            Embedder::UserProvided(_) => 1,
+            Embedder::UserProvided(_) => 100,
            Embedder::Rest(embedder) => embedder.chunk_count_hint(),
        }
    }
--- a/crates/milli/src/vector/parsed_vectors.rs
+++ b/crates/milli/src/vector/parsed_vectors.rs
@ -19,10 +19,54 @@ pub enum RawVectors<'doc> {
    ImplicitlyUserProvided(#[serde(borrow)] Option<&'doc RawValue>),
 }
 pub enum RawVectorsError {
    DeserializeSeq { index: usize, error: String },
    DeserializeKey { error: String },
    DeserializeRegenerate { error: String },
    DeserializeEmbeddings { error: String },
    UnknownField { field: String },
    MissingRegenerate,
    WrongKind { kind: &'static str, value: String },
    Parsing(serde_json::Error),
 }
 impl RawVectorsError {
    pub fn msg(self, embedder_name: &str) -> String {
        match self {
            RawVectorsError::DeserializeSeq { index, error } => format!(
                "Could not parse `._vectors.{embedder_name}[{index}]`: {error}"
            ),
            RawVectorsError::DeserializeKey { error } => format!(
                "Could not parse a field at `._vectors.{embedder_name}`: {error}"
            ),
            RawVectorsError::DeserializeRegenerate { error } => format!(
                "Could not parse `._vectors.{embedder_name}.regenerate`: {error}"
            ),
            RawVectorsError::DeserializeEmbeddings { error } => format!(
                "Could not parse `._vectors.{embedder_name}.embeddings`: {error}"
            ),
            RawVectorsError::UnknownField { field } => format!(
                "Unexpected field `._vectors.{embedder_name}.{field}`\n  \
                  - note: the allowed fields are `regenerate` and `embeddings`"
            ),
            RawVectorsError::MissingRegenerate => format!(
                "Missing field `._vectors.{embedder_name}.regenerate`\n  \
                - note: `._vectors.{embedder_name}` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`"
            ),
            RawVectorsError::WrongKind { kind, value } => format!(
                "Expected `._vectors.{embedder_name}` to be an array of floats, an array of arrays of floats, or an object with at least the field `regenerate`, but got the {kind} `{value}`"
            ),
            RawVectorsError::Parsing(error) => format!(
                "Could not parse `._vectors.{embedder_name}`: {error}"
            ),
        }
    }
 }
 impl<'doc> RawVectors<'doc> {
-    pub fn from_raw_value(raw: &'doc RawValue) -> Result<Self, serde_json::Error> {
+    pub fn from_raw_value(raw: &'doc RawValue) -> Result<Self, RawVectorsError> {
        use serde::de::Deserializer as _;
-        Ok(match raw.deserialize_any(RawVectorsVisitor)? {
+        Ok(match raw.deserialize_any(RawVectorsVisitor).map_err(RawVectorsError::Parsing)?? {
            RawVectorsVisitorValue::ImplicitNone => RawVectors::ImplicitlyUserProvided(None),
            RawVectorsVisitorValue::Implicit => RawVectors::ImplicitlyUserProvided(Some(raw)),
            RawVectorsVisitorValue::Explicit { regenerate, embeddings } => {
@ -41,7 +85,7 @@ enum RawVectorsVisitorValue<'doc> {
 }
 impl<'doc> serde::de::Visitor<'doc> for RawVectorsVisitor {
-    type Value = RawVectorsVisitorValue<'doc>;
+    type Value = std::result::Result<RawVectorsVisitorValue<'doc>, RawVectorsError>;
    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(formatter, "a map containing at least `regenerate`, or an array of floats`")
@ -51,7 +95,7 @@ impl<'doc> serde::de::Visitor<'doc> for RawVectorsVisitor {
    where
        E: serde::de::Error,
    {
-        Ok(RawVectorsVisitorValue::ImplicitNone)
+        Ok(Ok(RawVectorsVisitorValue::ImplicitNone))
    }
    fn visit_some<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
@ -65,42 +109,150 @@ impl<'doc> serde::de::Visitor<'doc> for RawVectorsVisitor {
    where
        E: serde::de::Error,
    {
-        Ok(RawVectorsVisitorValue::ImplicitNone)
+        Ok(Ok(RawVectorsVisitorValue::ImplicitNone))
    }
    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
    where
        A: serde::de::SeqAccess<'doc>,
    {
        let mut index = 0;
        // must consume all elements or parsing fails
-        while let Some(_) = seq.next_element::<&RawValue>()? {}
+        loop {
-        Ok(RawVectorsVisitorValue::Implicit)
+            match seq.next_element::<&RawValue>() {
                Ok(Some(_)) => index += 1,
                Err(error) => {
                    return Ok(Err(RawVectorsError::DeserializeSeq {
                        index,
                        error: error.to_string(),
                    }))
                }
                Ok(None) => break,
            };
        }
        Ok(Ok(RawVectorsVisitorValue::Implicit))
    }
    fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
    where
        A: serde::de::MapAccess<'doc>,
    {
        use serde::de::Error as _;
        let mut regenerate = None;
        let mut embeddings = None;
-        while let Some(s) = map.next_key()? {
+        loop {
-            match s {
+            match map.next_key::<&str>() {
-                "regenerate" => {
+                Ok(Some("regenerate")) => {
-                    let value: bool = map.next_value()?;
+                    let value: bool = match map.next_value() {
                        Ok(value) => value,
                        Err(error) => {
                            return Ok(Err(RawVectorsError::DeserializeRegenerate {
                                error: error.to_string(),
                            }))
                        }
                    };
                    regenerate = Some(value);
                }
-                "embeddings" => {
+                Ok(Some("embeddings")) => {
-                    let value: &RawValue = map.next_value()?;
+                    let value: &RawValue = match map.next_value() {
                        Ok(value) => value,
                        Err(error) => {
                            return Ok(Err(RawVectorsError::DeserializeEmbeddings {
                                error: error.to_string(),
                            }))
                        }
                    };
                    embeddings = Some(value);
                }
-                other => return Err(A::Error::unknown_field(other, &["regenerate", "embeddings"])),
+                Ok(Some(other)) => {
                    return Ok(Err(RawVectorsError::UnknownField { field: other.to_string() }))
                }
                Ok(None) => break,
                Err(error) => {
                    return Ok(Err(RawVectorsError::DeserializeKey { error: error.to_string() }))
                }
            }
        }
        let Some(regenerate) = regenerate else {
-            return Err(A::Error::missing_field("regenerate"));
+            return Ok(Err(RawVectorsError::MissingRegenerate));
        };
-        Ok(RawVectorsVisitorValue::Explicit { regenerate, embeddings })
+        Ok(Ok(RawVectorsVisitorValue::Explicit { regenerate, embeddings }))
    }
    fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "boolean", value: v.to_string() }))
    }
    fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "integer", value: v.to_string() }))
    }
    fn visit_i128<E>(self, v: i128) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "integer", value: v.to_string() }))
    }
    fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "integer", value: v.to_string() }))
    }
    fn visit_u128<E>(self, v: u128) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "integer", value: v.to_string() }))
    }
    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "number", value: v.to_string() }))
    }
    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "string", value: v.to_string() }))
    }
    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "string", value: v }))
    }
    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
    where
        E: serde::de::Error,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "bytes", value: format!("{v:?}") }))
    }
    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
    where
        D: serde::Deserializer<'doc>,
    {
        deserializer.deserialize_any(self)
    }
    fn visit_enum<A>(self, _data: A) -> Result<Self::Value, A::Error>
    where
        A: serde::de::EnumAccess<'doc>,
    {
        Ok(Err(RawVectorsError::WrongKind { kind: "enum", value: "a variant".to_string() }))
    }
 }
@ -343,7 +495,7 @@ impl Error {
            Error::InvalidEmbedderConf { error } => {
                crate::Error::UserError(UserError::InvalidVectorsEmbedderConf {
                    document_id,
-                    error,
+                    error: error.to_string(),
                })
            }
            Error::InternalSerdeJson(error) => {
Author	SHA1	Message	Date
Louis Dureuil	c890bd2cdf	Merge `a01bc7b454` into `94fb55bb6f`	2024-11-13 12:21:41 +01:00
meili-bors[bot]	94fb55bb6f	Merge #5049 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Details Test suite / Tests on ubuntu-20.04 (push) Failing after 59s Details Test suite / Tests almost all features (push) Has been skipped Details Test suite / Test disabled tokenization (push) Has been skipped Details Test suite / Run tests in debug (push) Failing after 13s Details Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 7m4s Details Test suite / Run Clippy (push) Successful in 10m58s Details Test suite / Run Rustfmt (push) Successful in 2m34s Details Run the indexing fuzzer / Setup the action (push) Successful in 1h5m58s Details Indexing bench (push) / Run and upload benchmarks (push) Has been cancelled Details Benchmarks of indexing (push) / Run and upload benchmarks (push) Has been cancelled Details Benchmarks of search for geo (push) / Run and upload benchmarks (push) Has been cancelled Details Benchmarks of search for songs (push) / Run and upload benchmarks (push) Has been cancelled Details Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Has been cancelled Details 5049: Fix the path used in the flaky tests CI r=irevoire a=Kerollmops This PR fixes [the flaky tests CI](https://github.com/meilisearch/meilisearch/actions/runs/11741717787) path used. Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-11-13 10:26:50 +00:00
Louis Dureuil	a01bc7b454	Fix error_document_field_limit_reached_in_one_document test	2024-11-13 10:34:54 +01:00
Louis Dureuil	7accfea624	Don't short circuit when we encounter a semantic error while extracting fields and external docid	2024-11-13 10:33:59 +01:00
Clément Renault	009709eace	Fix the path used in the flaky tests CI	2024-11-13 09:52:10 +01:00
Louis Dureuil	82dcaba6ca	Fix test: somehow on main vectors where displayed even though retrieveVectors: false	2024-11-12 23:58:25 +01:00
Louis Dureuil	cb1d6613dd	Adjust snapshots	2024-11-12 23:26:30 +01:00
Louis Dureuil	3b0cb5b487	Fix vector error messages	2024-11-12 23:26:16 +01:00
Louis Dureuil	bfdcd1cf33	Space changes	2024-11-12 22:52:45 +01:00
Louis Dureuil	1d13e804f7	Adjust test snapshots	2024-11-12 22:52:41 +01:00
Louis Dureuil	c4e9f761e9	Emit better error messages when parsing vectors	2024-11-12 22:49:22 +01:00
Louis Dureuil	8a6e61c77f	InvalidVectorsEmbedderConf error takes a String rather than a deserr error	2024-11-12 22:47:57 +01:00
meili-bors[bot]	a5d7ae23bd	Merge #5044 5044: Adds new metrics to prometheus r=irevoire a=PedroTurik not 100% confident in this solution, especially because i couldn't make the "Search Queue searches waiting" metric give me any value other than 0 with my local testing 😆. But i believe it solves the Issue. # Pull Request ## Related issue Fixes #4998 ## What does this PR do? ### Adds new metrics to prometheus; - SearchQueue size, - SearchQueue searches running, - and Search Queue searches waiting. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Co-authored-by: Pedro Turik Firmino <pedroturik@gmail.com>	2024-11-07 17:05:43 +00:00
PedroTurik	03886d0012	Applies optimizations to formatted integration tests (#5043 )	2024-11-07 15:58:55 +01:00
meili-bors[bot]	b427b9e88f	Merge #5025 5025: test: improve performance of get_documents.rs r=irevoire a=PedroTurik # Pull Request ## Related issue Fixes one item from #4840 ## What does this PR do? - Applies the changes recommended on the issue for `meilisearch/tests/documents/get_documents.rs` ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Pedro Turik Firmino <pedroturik@gmail.com>	2024-11-07 09:46:34 +00:00
Pedro Turik Firmino	8b95f5ccc6	Adds new metrics to prometheus: SearchQueue size, SearchQueue searches running, and Search Queue searches waiting.	2024-11-06 15:37:16 -03:00
Pedro Turik Firmino	da59a043ba	Fixes formatting issues	2024-11-06 09:55:48 -03:00
Pedro Turik Firmino	da4d47b5d0	Fixes formatting issues	2024-11-06 09:54:20 -03:00
Pedro Turik Firmino	d0b1ba20cb	Improves usage of shared indexes	2024-11-04 17:26:50 -03:00
Pedro Turik Firmino	c79ca9679b	Changes variable name to re-run CI	2024-11-02 18:25:33 -03:00
Pedro Turik Firmino	a934b0ac6a	Applies optimizations to some integration tests	2024-10-29 18:49:06 -03:00