From 5d50850e12f72a07221184c7d9962f511a6dc791 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 3 Jun 2024 16:04:14 +0200
Subject: [PATCH] always push the user defined vectors in arroy

---
 index-scheduler/src/lib.rs                    |  14 +-
 ..._scheduler__tests__import_vectors-15.snap} |   4 -
 ..._scheduler__tests__import_vectors-22.snap} |   4 -
 ...x_scheduler__tests__import_vectors-5.snap} |   0
 ...x_scheduler__tests__import_vectors-8.snap} |   0
 ..._scheduler__tests__settings_update-5.snap} |   0
 .../documents after setting an embedder.snap  |   4 -
 meilisearch/tests/search/hybrid.rs            |  40 +--
 meilisearch/tests/search/mod.rs               |   1 +
 meilisearch/tests/similar/mod.rs              | 217 ++++++++--------
 ...__attribute_fid__attribute_fid_ngrams.snap | 244 ------------------
 .../1/field_distribution.snap                 |   7 -
 .../field_distribution.snap                   |   7 -
 .../extract/extract_vector_points.rs          |  75 +++---
 milli/src/vector/parsed_vectors.rs            |  22 +-
 15 files changed, 189 insertions(+), 450 deletions(-)
 rename index-scheduler/src/snapshots/{index_scheduler__tests__import_vectors-9.snap => index_scheduler__tests__import_vectors-15.snap} (67%)
 rename index-scheduler/src/snapshots/{index_scheduler__tests__import_vectors-12.snap => index_scheduler__tests__import_vectors-22.snap} (67%)
 rename index-scheduler/src/snapshots/{index_scheduler__tests__import_vectors-4.snap => index_scheduler__tests__import_vectors-5.snap} (100%)
 rename index-scheduler/src/snapshots/{index_scheduler__tests__import_vectors-6.snap => index_scheduler__tests__import_vectors-8.snap} (100%)
 rename index-scheduler/src/snapshots/{index_scheduler__tests__settings_update-3.snap => index_scheduler__tests__settings_update-5.snap} (100%)
 delete mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after setting an embedder.snap
 delete mode 100644 milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap
 delete mode 100644 milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
 delete mode 100644 milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index d007acd2c..f69736297 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -5173,8 +5173,8 @@ mod tests {
         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
 
         println!("HEEEEERE");
-        // handle.advance_one_successful_batch();
-        handle.advance_one_failed_batch();
+        handle.advance_one_successful_batch();
+        // handle.advance_one_failed_batch();
         snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
 
         {
@@ -5351,9 +5351,9 @@ mod tests {
         // as user provided since we explicitely marked it as NOT user provided.
         snapshot!(format!("{conf:#?}"), @r###"
         [
-            (
-                "my_doggo_embedder",
-                EmbeddingConfig {
+            IndexEmbeddingConfig {
+                name: "my_doggo_embedder",
+                config: EmbeddingConfig {
                     embedder_options: HuggingFace(
                         EmbedderOptions {
                             model: "sentence-transformers/all-MiniLM-L6-v2",
@@ -5367,8 +5367,8 @@ mod tests {
                         template: "{{doc.doggo}}",
                     },
                 },
-                RoaringBitmap<[1, 2]>,
-            ),
+                user_defined: RoaringBitmap<[1, 2]>,
+            },
         ]
         "###);
         let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap
similarity index 67%
rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap
rename to index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap
index 002a42e59..540835dfb 100644
--- a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-15.snap
@@ -6,10 +6,6 @@ expression: doc
   "doggo": "Intel",
   "breed": "beagle",
   "_vectors": {
-    "A_fakerest": {
-      "embeddings": "[vector]",
-      "userProvided": true
-    },
     "noise": [
       0.1,
       0.2,
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap
similarity index 67%
rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap
rename to index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap
index 718ea229c..bc35d84f6 100644
--- a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-22.snap
@@ -6,10 +6,6 @@ expression: doc
   "doggo": "kefir",
   "breed": "patou",
   "_vectors": {
-    "A_fakerest": {
-      "embeddings": "[vector]",
-      "userProvided": true
-    },
     "noise": [
       0.1,
       0.2,
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-4.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap
similarity index 100%
rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-4.snap
rename to index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-6.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap
similarity index 100%
rename from index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-6.snap
rename to index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-8.snap
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-3.snap b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap
similarity index 100%
rename from index-scheduler/src/snapshots/index_scheduler__tests__settings_update-3.snap
rename to index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after setting an embedder.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after setting an embedder.snap
deleted file mode 100644
index 853be8b0a..000000000
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors_first_and_embedder_later/documents after setting an embedder.snap	
+++ /dev/null
@@ -1,4 +0,0 @@
----
-source: index-scheduler/src/lib.rs
----
-[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1.0,2.0,3.0]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4.0,5.0]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]
diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index 0c8b4534c..1e415bc63 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -204,7 +204,7 @@ async fn distribution_shift() {
     let server = Server::new().await;
     let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
 
-    let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
+    let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}, "retrieveVectors": true});
     let (response, code) = index.search_post(search.clone()).await;
     snapshot!(code, @"200 OK");
     snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
@@ -239,20 +239,23 @@ async fn highlighter() {
     let (response, code) = index
         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
             "hybrid": {"semanticRatio": 0.2},
-            "attributesToHighlight": [
-                     "desc"
+           "retrieveVectors": true,
+           "attributesToHighlight": [
+                     "desc",
+                     "_vectors",
                    ],
-                   "highlightPreTag": "**BEGIN**",
-                   "highlightPostTag": "**END**"
+           "highlightPreTag": "**BEGIN**",
+           "highlightPostTag": "**END**",
         }))
         .await;
     snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###);
     snapshot!(response["semanticHitCount"], @"0");
 
     let (response, code) = index
         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
             "hybrid": {"semanticRatio": 0.8},
+            "retrieveVectors": true,
             "showRankingScore": true,
             "attributesToHighlight": [
                      "desc"
@@ -262,13 +265,14 @@ async fn highlighter() {
         }))
         .await;
     snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
     snapshot!(response["semanticHitCount"], @"3");
 
     // no highlighting on full semantic
     let (response, code) = index
         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
             "hybrid": {"semanticRatio": 1.0},
+            "retrieveVectors": true,
             "showRankingScore": true,
             "attributesToHighlight": [
                      "desc"
@@ -278,7 +282,7 @@ async fn highlighter() {
         }))
         .await;
     snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_rankingScore":0.9472135901451112}]"###);
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
     snapshot!(response["semanticHitCount"], @"3");
 }
 
@@ -361,7 +365,7 @@ async fn single_document() {
 
     let (response, code) = index
     .search_post(
-        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
+        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
     )
     .await;
 
@@ -377,7 +381,7 @@ async fn query_combination() {
 
     // search without query and vector, but with hybrid => still placeholder
     let (response, code) = index
-        .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
+        .search_post(json!({"hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
         .await;
 
     snapshot!(code, @"200 OK");
@@ -386,7 +390,7 @@ async fn query_combination() {
 
     // same with a different semantic ratio
     let (response, code) = index
-        .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true}))
+        .search_post(json!({"hybrid": {"semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
         .await;
 
     snapshot!(code, @"200 OK");
@@ -395,7 +399,7 @@ async fn query_combination() {
 
     // wrong vector dimensions
     let (response, code) = index
-    .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
+    .search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
     .await;
 
     snapshot!(code, @"400 Bad Request");
@@ -410,7 +414,7 @@ async fn query_combination() {
 
     // full vector
     let (response, code) = index
-    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}))
+    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
     .await;
 
     snapshot!(code, @"200 OK");
@@ -419,7 +423,7 @@ async fn query_combination() {
 
     // full keyword, without a query
     let (response, code) = index
-    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
+    .search_post(json!({"vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
     .await;
 
     snapshot!(code, @"200 OK");
@@ -428,7 +432,7 @@ async fn query_combination() {
 
     // query + vector, full keyword => keyword
     let (response, code) = index
-    .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true}))
+    .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
     .await;
 
     snapshot!(code, @"200 OK");
@@ -437,7 +441,7 @@ async fn query_combination() {
 
     // query + vector, no hybrid keyword =>
     let (response, code) = index
-        .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true}))
+        .search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true}))
         .await;
 
     snapshot!(code, @"400 Bad Request");
@@ -453,7 +457,7 @@ async fn query_combination() {
     // full vector, without a vector => error
     let (response, code) = index
         .search_post(
-            json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
+            json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}),
         )
         .await;
 
@@ -470,7 +474,7 @@ async fn query_combination() {
     // hybrid without a vector => full keyword
     let (response, code) = index
         .search_post(
-            json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true}),
+            json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99}, "showRankingScore": true, "retrieveVectors": true}),
         )
         .await;
 
diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index b65c0dc42..955b324a6 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -1337,6 +1337,7 @@ async fn experimental_feature_vector_store() {
         .search_post(json!({
             "vector": [1.0, 2.0, 3.0],
             "showRankingScore": true,
+            "retrieveVectors": true,
         }))
         .await;
 
diff --git a/meilisearch/tests/similar/mod.rs b/meilisearch/tests/similar/mod.rs
index a2378eb58..f2af91588 100644
--- a/meilisearch/tests/similar/mod.rs
+++ b/meilisearch/tests/similar/mod.rs
@@ -78,7 +78,7 @@ async fn basic() {
     index.wait_task(value.uid()).await;
 
     index
-        .similar(json!({"id": 143}), |response, code| {
+        .similar(json!({"id": 143, "retrieveVectors": true}), |response, code| {
             snapshot!(code, @"200 OK");
             snapshot!(json_string!(response["hits"]), @r###"
             [
@@ -88,9 +88,9 @@ async fn basic() {
                 "id": "522681",
                 "_vectors": {
                   "manual": [
-                    0.1,
-                    0.6,
-                    0.8
+                    0.10000000149011612,
+                    0.6000000238418579,
+                    0.800000011920929
                   ]
                 }
               },
@@ -100,9 +100,9 @@ async fn basic() {
                 "id": "299537",
                 "_vectors": {
                   "manual": [
-                    0.6,
-                    0.8,
-                    -0.2
+                    0.6000000238418579,
+                    0.800000011920929,
+                    -0.20000000298023224
                   ]
                 }
               },
@@ -112,9 +112,9 @@ async fn basic() {
                 "id": "166428",
                 "_vectors": {
                   "manual": [
-                    0.7,
-                    0.7,
-                    -0.4
+                    0.699999988079071,
+                    0.699999988079071,
+                    -0.4000000059604645
                   ]
                 }
               },
@@ -124,8 +124,8 @@ async fn basic() {
                 "id": "287947",
                 "_vectors": {
                   "manual": [
-                    0.8,
-                    0.4,
+                    0.800000011920929,
+                    0.4000000059604645,
                     -0.5
                   ]
                 }
@@ -136,7 +136,7 @@ async fn basic() {
         .await;
 
     index
-        .similar(json!({"id": "299537"}), |response, code| {
+        .similar(json!({"id": "299537", "retrieveVectors": true}), |response, code| {
             snapshot!(code, @"200 OK");
             snapshot!(json_string!(response["hits"]), @r###"
             [
@@ -146,9 +146,9 @@ async fn basic() {
                 "id": "166428",
                 "_vectors": {
                   "manual": [
-                    0.7,
-                    0.7,
-                    -0.4
+                    0.699999988079071,
+                    0.699999988079071,
+                    -0.4000000059604645
                   ]
                 }
               },
@@ -158,8 +158,8 @@ async fn basic() {
                 "id": "287947",
                 "_vectors": {
                   "manual": [
-                    0.8,
-                    0.4,
+                    0.800000011920929,
+                    0.4000000059604645,
                     -0.5
                   ]
                 }
@@ -170,9 +170,9 @@ async fn basic() {
                 "id": "522681",
                 "_vectors": {
                   "manual": [
-                    0.1,
-                    0.6,
-                    0.8
+                    0.10000000149011612,
+                    0.6000000238418579,
+                    0.800000011920929
                   ]
                 }
               },
@@ -183,8 +183,8 @@ async fn basic() {
                 "_vectors": {
                   "manual": [
                     -0.5,
-                    0.3,
-                    0.85
+                    0.30000001192092896,
+                    0.8500000238418579
                   ]
                 }
               }
@@ -456,71 +456,77 @@ async fn filter() {
     index.wait_task(value.uid()).await;
 
     index
-        .similar(json!({"id": 522681, "filter": "release_year = 2019"}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "Captain Marvel",
-                "release_year": 2019,
-                "id": "299537",
-                "_vectors": {
-                  "manual": [
-                    0.6,
-                    0.8,
-                    -0.2
-                  ]
-                }
-              },
-              {
-                "title": "How to Train Your Dragon: The Hidden World",
-                "release_year": 2019,
-                "id": "166428",
-                "_vectors": {
-                  "manual": [
-                    0.7,
-                    0.7,
-                    -0.4
-                  ]
-                }
-              },
-              {
-                "title": "Shazam!",
-                "release_year": 2019,
-                "id": "287947",
-                "_vectors": {
-                  "manual": [
-                    0.8,
-                    0.4,
-                    -0.5
-                  ]
-                }
-              }
-            ]
-            "###);
-        })
+        .similar(
+            json!({"id": 522681, "filter": "release_year = 2019", "retrieveVectors": true}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6000000238418579,
+                        0.800000011920929,
+                        -0.20000000298023224
+                      ]
+                    }
+                  },
+                  {
+                    "title": "How to Train Your Dragon: The Hidden World",
+                    "release_year": 2019,
+                    "id": "166428",
+                    "_vectors": {
+                      "manual": [
+                        0.699999988079071,
+                        0.699999988079071,
+                        -0.4000000059604645
+                      ]
+                    }
+                  },
+                  {
+                    "title": "Shazam!",
+                    "release_year": 2019,
+                    "id": "287947",
+                    "_vectors": {
+                      "manual": [
+                        0.800000011920929,
+                        0.4000000059604645,
+                        -0.5
+                      ]
+                    }
+                  }
+                ]
+                "###);
+            },
+        )
         .await;
 
     index
-        .similar(json!({"id": 522681, "filter": "release_year < 2000"}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "All Quiet on the Western Front",
-                "release_year": 1930,
-                "id": "143",
-                "_vectors": {
-                  "manual": [
-                    -0.5,
-                    0.3,
-                    0.85
-                  ]
-                }
-              }
-            ]
-            "###);
-        })
+        .similar(
+            json!({"id": 522681, "filter": "release_year < 2000", "retrieveVectors": true}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "All Quiet on the Western Front",
+                    "release_year": 1930,
+                    "id": "143",
+                    "_vectors": {
+                      "manual": [
+                        -0.5,
+                        0.30000001192092896,
+                        0.8500000238418579
+                      ]
+                    }
+                  }
+                ]
+                "###);
+            },
+        )
         .await;
 }
 
@@ -579,24 +585,27 @@ async fn limit_and_offset() {
         .await;
 
     index
-        .similar(json!({"id": 143, "limit": 1, "offset": 1}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "Captain Marvel",
-                "release_year": 2019,
-                "id": "299537",
-                "_vectors": {
-                  "manual": [
-                    0.6,
-                    0.8,
-                    -0.2
-                  ]
-                }
-              }
-            ]
-            "###);
-        })
+        .similar(
+            json!({"id": 143, "limit": 1, "offset": 1, "retrieveVectors": true}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Captain Marvel",
+                    "release_year": 2019,
+                    "id": "299537",
+                    "_vectors": {
+                      "manual": [
+                        0.6000000238418579,
+                        0.800000011920929,
+                        -0.20000000298023224
+                      ]
+                    }
+                  }
+                ]
+                "###);
+            },
+        )
         .await;
 }
diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap
deleted file mode 100644
index 930a21626..000000000
--- a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams.snap
+++ /dev/null
@@ -1,244 +0,0 @@
----
-source: milli/src/search/new/tests/attribute_fid.rs
-expression: "format!(\"{document_ids_scores:#?}\")"
----
-[
-    (
-        2,
-        [
-            Fid(
-                Rank {
-                    rank: 19,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 91,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        6,
-        [
-            Fid(
-                Rank {
-                    rank: 15,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 81,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        5,
-        [
-            Fid(
-                Rank {
-                    rank: 14,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 79,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        4,
-        [
-            Fid(
-                Rank {
-                    rank: 13,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 77,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        3,
-        [
-            Fid(
-                Rank {
-                    rank: 12,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 83,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        9,
-        [
-            Fid(
-                Rank {
-                    rank: 11,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 75,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        8,
-        [
-            Fid(
-                Rank {
-                    rank: 10,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 79,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        7,
-        [
-            Fid(
-                Rank {
-                    rank: 10,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 73,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        11,
-        [
-            Fid(
-                Rank {
-                    rank: 7,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 77,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        10,
-        [
-            Fid(
-                Rank {
-                    rank: 6,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 81,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        13,
-        [
-            Fid(
-                Rank {
-                    rank: 6,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 81,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        12,
-        [
-            Fid(
-                Rank {
-                    rank: 6,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 78,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        14,
-        [
-            Fid(
-                Rank {
-                    rank: 5,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 75,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-    (
-        0,
-        [
-            Fid(
-                Rank {
-                    rank: 1,
-                    max_rank: 19,
-                },
-            ),
-            Position(
-                Rank {
-                    rank: 91,
-                    max_rank: 91,
-                },
-            ),
-        ],
-    ),
-]
diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
deleted file mode 100644
index 1d1d629e6..000000000
--- a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
+++ /dev/null
@@ -1,7 +0,0 @@
----
-source: milli/src/index.rs
----
-age              1      |
-id               2      |
-name             2      |
-
diff --git a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap
deleted file mode 100644
index 1d1d629e6..000000000
--- a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap
+++ /dev/null
@@ -1,7 +0,0 @@
----
-source: milli/src/index.rs
----
-age              1      |
-id               2      |
-name             2      |
-
diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs
index 3eb761bce..1e56bec83 100644
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -8,7 +8,6 @@ use std::sync::Arc;
 
 use bytemuck::cast_slice;
 use grenad::Writer;
-use itertools::EitherOrBoth;
 use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::Value;
@@ -50,7 +49,7 @@ enum VectorStateDelta {
     // Note: changing the value of the manually specified vector **should not record** this delta
     WasGeneratedNowManual(Vec<Vec<f32>>),
 
-    ManualDelta(Vec<Vec<f32>>, Vec<Vec<f32>>),
+    ManualDelta(Vec<Vec<f32>>),
 
     // Add the vector computed from the specified prompt
     // Remove any previous vector
@@ -59,14 +58,12 @@ enum VectorStateDelta {
 }
 
 impl VectorStateDelta {
-    fn into_values(self) -> (bool, String, (Vec<Vec<f32>>, Vec<Vec<f32>>)) {
+    fn into_values(self) -> (bool, String, Vec<Vec<f32>>) {
         match self {
             VectorStateDelta::NoChange => Default::default(),
             VectorStateDelta::NowRemoved => (true, Default::default(), Default::default()),
-            VectorStateDelta::WasGeneratedNowManual(add) => {
-                (true, Default::default(), (Default::default(), add))
-            }
-            VectorStateDelta::ManualDelta(del, add) => (false, Default::default(), (del, add)),
+            VectorStateDelta::WasGeneratedNowManual(add) => (true, Default::default(), add),
+            VectorStateDelta::ManualDelta(add) => (false, Default::default(), add),
             VectorStateDelta::NowGenerated(prompt) => (true, prompt, Default::default()),
         }
     }
@@ -166,8 +163,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
         // lazily get it when needed
         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
 
-        let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid)
-            .map_err(|error| error.to_crate_error(document_id().to_string()))?;
+        let mut parsed_vectors = ParsedVectorsDiff::new(
+            docid,
+            embedders_configs,
+            obkv,
+            old_vectors_fid,
+            new_vectors_fid,
+        )
+        .map_err(|error| error.to_crate_error(document_id().to_string()))?;
 
         for EmbedderVectorExtractor {
             embedder_name,
@@ -182,7 +185,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
         {
             let delta = match parsed_vectors.remove(embedder_name) {
                 (Some(old), Some(new)) => {
-                    match (old.is_user_provided(), new.is_user_provided()) {
+                    match (old.map_or(true, |old| old.is_user_provided()), new.is_user_provided()) {
                         (true, true) | (false, false) => (),
                         (true, false) => {
                             remove_from_user_defined.insert(docid);
@@ -193,7 +196,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                     }
 
                     // no autogeneration
-                    let del_vectors = old.into_array_of_vectors();
                     let add_vectors = new.into_array_of_vectors();
 
                     if add_vectors.len() > usize::from(u8::MAX) {
@@ -203,15 +205,15 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                         )));
                     }
 
-                    VectorStateDelta::ManualDelta(del_vectors, add_vectors)
+                    VectorStateDelta::ManualDelta(add_vectors)
                 }
-                (Some(_old), None) => {
+                (Some(old), None) => {
                     // Do we keep this document?
                     let document_is_kept = obkv
                         .iter()
                         .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
                         .any(|deladd| deladd.get(DelAdd::Addition).is_some());
-                    if document_is_kept {
+                    if document_is_kept && old.is_some() {
                         remove_from_user_defined.insert(docid);
                         // becomes autogenerated
                         VectorStateDelta::NowGenerated(prompt.render(
@@ -219,6 +221,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                             DelAdd::Addition,
                             new_fields_ids_map,
                         )?)
+                    } else if document_is_kept && old.is_none() {
+                        VectorStateDelta::NoChange
                     } else {
                         VectorStateDelta::NowRemoved
                     }
@@ -315,8 +319,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
     Ok(results)
 }
 
-/// Computes the diff between both Del and Add numbers and
-/// only inserts the parts that differ in the sorter.
+/// We cannot compute the diff between both Del and Add vectors.
+/// We'll push every vector and compute the difference later in TypedChunk.
 fn push_vectors_diff(
     remove_vectors_writer: &mut Writer<BufWriter<File>>,
     prompts_writer: &mut Writer<BufWriter<File>>,
@@ -325,7 +329,7 @@ fn push_vectors_diff(
     delta: VectorStateDelta,
     reindex_vectors: bool,
 ) -> Result<()> {
-    let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
+    let (must_remove, prompt, mut add_vectors) = delta.into_values();
     if must_remove
     // TODO: the below condition works because we erase the vec database when a embedding setting changes.
     // When vector pipeline will be optimized, this should be removed.
@@ -340,44 +344,25 @@ fn push_vectors_diff(
     }
 
     // We sort and dedup the vectors
-    del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
     add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
-    del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
     add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
 
-    let merged_vectors_iter =
-        itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
+    // let merged_vectors_iter =
+    //     itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
 
     // insert vectors into the writer
-    for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) {
+    for (i, vector) in add_vectors.into_iter().enumerate().take(u16::MAX as usize) {
         // Generate the key by extending the unique index to it.
         key_buffer.truncate(TRUNCATE_SIZE);
         let index = u16::try_from(i).unwrap();
         key_buffer.extend_from_slice(&index.to_be_bytes());
 
-        match eob {
-            EitherOrBoth::Both(_, _) => (), // no need to touch anything
-            EitherOrBoth::Left(vector) => {
-                // TODO: the below condition works because we erase the vec database when a embedding setting changes.
-                // When vector pipeline will be optimized, this should be removed.
-                if !reindex_vectors {
-                    // We insert only the Del part of the Obkv to inform
-                    // that we only want to remove all those vectors.
-                    let mut obkv = KvWriterDelAdd::memory();
-                    obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
-                    let bytes = obkv.into_inner()?;
-                    manual_vectors_writer.insert(&key_buffer, bytes)?;
-                }
-            }
-            EitherOrBoth::Right(vector) => {
-                // We insert only the Add part of the Obkv to inform
-                // that we only want to remove all those vectors.
-                let mut obkv = KvWriterDelAdd::memory();
-                obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
-                let bytes = obkv.into_inner()?;
-                manual_vectors_writer.insert(&key_buffer, bytes)?;
-            }
-        }
+        // We insert only the Add part of the Obkv to inform
+        // that we only want to remove all those vectors.
+        let mut obkv = KvWriterDelAdd::memory();
+        obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
+        let bytes = obkv.into_inner()?;
+        manual_vectors_writer.insert(&key_buffer, bytes)?;
     }
 
     Ok(())
diff --git a/milli/src/vector/parsed_vectors.rs b/milli/src/vector/parsed_vectors.rs
index 62c418149..672e27cc5 100644
--- a/milli/src/vector/parsed_vectors.rs
+++ b/milli/src/vector/parsed_vectors.rs
@@ -4,8 +4,9 @@ use obkv::KvReader;
 use serde_json::{from_slice, Value};
 
 use super::Embedding;
+use crate::index::IndexEmbeddingConfig;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
-use crate::{FieldId, InternalError, UserError};
+use crate::{DocumentId, FieldId, InternalError, UserError};
 
 pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
 
@@ -42,17 +43,19 @@ pub struct ExplicitVectors {
 }
 
 pub struct ParsedVectorsDiff {
-    pub old: Option<BTreeMap<String, Vectors>>,
+    pub old: BTreeMap<String, Option<Vectors>>,
     pub new: Option<BTreeMap<String, Vectors>>,
 }
 
 impl ParsedVectorsDiff {
     pub fn new(
+        docid: DocumentId,
+        embedders_configs: &[IndexEmbeddingConfig],
         documents_diff: KvReader<'_, FieldId>,
         old_vectors_fid: Option<FieldId>,
         new_vectors_fid: Option<FieldId>,
     ) -> Result<Self, Error> {
-        let old = match old_vectors_fid
+        let mut old = match old_vectors_fid
             .and_then(|vectors_fid| documents_diff.get(vectors_fid))
             .map(KvReaderDelAdd::new)
             .map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
@@ -68,7 +71,13 @@ impl ParsedVectorsDiff {
                 return Err(error);
             }
         }
-        .flatten();
+        .flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect());
+        for embedding_config in embedders_configs {
+            if embedding_config.user_defined.contains(docid) {
+                old.entry(embedding_config.name.to_string()).or_insert(None);
+            }
+        }
+
         let new = new_vectors_fid
             .and_then(|vectors_fid| documents_diff.get(vectors_fid))
             .map(KvReaderDelAdd::new)
@@ -78,8 +87,9 @@ impl ParsedVectorsDiff {
         Ok(Self { old, new })
     }
 
-    pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) {
-        let old = self.old.as_mut().and_then(|old| old.remove(embedder_name));
+    /// Return (Some(None), _) in case the vector is user defined and contained in the database.
+    pub fn remove(&mut self, embedder_name: &str) -> (Option<Option<Vectors>>, Option<Vectors>) {
+        let old = self.old.remove(embedder_name);
         let new = self.new.as_mut().and_then(|new| new.remove(embedder_name));
         (old, new)
     }