From cf864a1c2ee5bba6e344a0d7aa11f1bbdb75f1ad Mon Sep 17 00:00:00 2001
From: yudrywet <yudeyao@yeah.net>
Date: Sun, 14 Apr 2024 20:11:34 +0800
Subject: [PATCH 01/56] chore: fix some typos in comments

Signed-off-by: yudrywet <yudeyao@yeah.net>
---
 milli/src/search/new/matches/mod.rs                           | 4 ++--
 milli/src/search/new/query_term/parse_query.rs                | 2 +-
 milli/src/update/facet/incremental.rs                         | 2 +-
 .../index_documents/extract/extract_fid_docid_facet_values.rs | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 2913f206d..8f0069589 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -134,7 +134,7 @@ impl<'t> Matcher<'t, '_> {
             for (token_position, word_position, word) in words_positions {
                 partial = match partial.match_token(word) {
                     // token matches the partial match, but the match is not full,
-                    // we temporarly save the current token then we try to match the next one.
+                    // we temporarily save the current token then we try to match the next one.
                     Some(MatchType::Partial(partial)) => {
                         potential_matches.push((token_position, word_position, partial.char_len()));
                         partial
@@ -722,7 +722,7 @@ mod tests {
             @"…void void void void void split the world void void"
         );
 
-        // Text containing matches with diferent density.
+        // Text containing matches with different density.
         let text = "split void the void void world void void void void void void void void void void split the world void void";
         let mut matcher = builder.build(text);
         // crop should return 10 last words with a marker at the start.
diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs
index 86be7da77..93f5f081c 100644
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -119,7 +119,7 @@ pub fn located_query_terms_from_tokens(
                             if let Some(located_query_term) = phrase.build(ctx) {
                                 // as we are evaluating a negative operator we put the phrase
                                 // in the negative one *but* we don't reset the negative operator
-                                // as we are immediatly starting a new negative phrase.
+                                // as we are immediately starting a new negative phrase.
                                 if negative_phrase {
                                     negative_phrases.push(located_query_term);
                                 } else {
diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs
index 798e0fe3d..f871eee31 100644
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@@ -499,7 +499,7 @@ impl FacetsUpdateIncrementalInner {
                     ModificationResult::Expand | ModificationResult::Reduce { .. }
                 )
             {
-                // if any modification occured, insert it in the database.
+                // if any modification occurred, insert it in the database.
                 self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
                 Ok(insertion_key_modification)
             } else {
diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 1f8af372d..d88d96919 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -36,7 +36,7 @@ pub struct ExtractedFacetValues {
 
 /// Extracts the facet values of each faceted field of each document.
 ///
-/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
+/// Returns the generated grenad reader containing the docid the fid and the original value as key
 /// and the normalized value as value extracted from the given chunk of documents.
 /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]

From ab43a8a9497bf55fb958d9d01b800c2a2fa8362b Mon Sep 17 00:00:00 2001
From: writegr <wellweek@outlook.com>
Date: Thu, 18 Apr 2024 14:12:52 +0800
Subject: [PATCH 02/56] chore: fix some typos in comments

Signed-off-by: writegr <wellweek@outlook.com>
---
 filter-parser/src/lib.rs            | 2 +-
 index-scheduler/src/batch.rs        | 2 +-
 meilisearch-types/src/deserr/mod.rs | 2 +-
 meilitool/src/main.rs               | 2 +-
 milli/src/documents/builder.rs      | 2 +-
 milli/src/search/new/geo_sort.rs    | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index fa5b70606..6bfbbb024 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -568,7 +568,7 @@ pub mod tests {
         insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
         insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
         insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
-        // but it also works with other sequencies
+        // but it also works with other sequences
         insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
     }
 
diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index 3161dc499..bc9823a01 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -13,7 +13,7 @@ We can combine the two tasks in a single batch:
 1. import documents X and Y
 
 Processing this batch is functionally equivalent to processing the two
-tasks individally, but should be much faster since we are only performing
+tasks individually, but should be much faster since we are only performing
 one indexing operation.
 */
 
diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs
index 537b24574..bf1aa1da5 100644
--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@@ -26,7 +26,7 @@ pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C
 
 /// A request deserialization error.
 ///
-/// The first generic paramater is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
+/// The first generic parameter is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
 /// The second generic parameter is the default error code for the deserialization error, in case it is not given.
 pub struct DeserrError<Format, C: Default + ErrorCode> {
     pub msg: String,
diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs
index bace7d16b..bfcbfdd6d 100644
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -129,7 +129,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
         }
     }
 
-    eprintln!("Sucessfully deleted {count} content files from disk!");
+    eprintln!("Successfully deleted {count} content files from disk!");
 
     Ok(())
 }
diff --git a/milli/src/documents/builder.rs b/milli/src/documents/builder.rs
index e5124f67f..ec4d634aa 100644
--- a/milli/src/documents/builder.rs
+++ b/milli/src/documents/builder.rs
@@ -203,7 +203,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
             "string" => (field_name, AllowedType::String),
             "boolean" => (field_name, AllowedType::Boolean),
             "number" => (field_name, AllowedType::Number),
-            // if the pattern isn't reconized, we keep the whole field.
+            // if the pattern isn't recognized, we keep the whole field.
             _otherwise => (header, AllowedType::String),
         },
         None => (header, AllowedType::String),
diff --git a/milli/src/search/new/geo_sort.rs b/milli/src/search/new/geo_sort.rs
index 5f5ceb379..4081c9637 100644
--- a/milli/src/search/new/geo_sort.rs
+++ b/milli/src/search/new/geo_sort.rs
@@ -42,7 +42,7 @@ fn facet_number_values<'a>(
 }
 
 /// Define the strategy used by the geo sort.
-/// The paramater represents the cache size, and, in the case of the Dynamic strategy,
+/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
 /// the point where we move from using the iterative strategy to the rtree.
 #[derive(Debug, Clone, Copy)]
 pub enum Strategy {

From 7f5ab3cef57091e2fa1280e10875cdf9c3f950de Mon Sep 17 00:00:00 2001
From: Simon Detheridge <s@sd.ai>
Date: Fri, 3 May 2024 12:29:31 +0100
Subject: [PATCH 03/56] Use http path pattern instead of full path in metrics

---
 meilisearch/src/middleware.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/meilisearch/src/middleware.rs b/meilisearch/src/middleware.rs
index 6707bb6d5..f3e665c29 100644
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@@ -59,10 +59,12 @@ where
             let request_path = req.path();
             let is_registered_resource = req.resource_map().has_resource(request_path);
             if is_registered_resource {
+                let request_pattern = req.match_pattern();
+                let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str);
                 let request_method = req.method().to_string();
                 histogram_timer = Some(
                     crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
-                        .with_label_values(&[&request_method, request_path])
+                        .with_label_values(&[&request_method, metric_path])
                         .start_timer(),
                 );
             }

From 3698aef66bdf3b1bca2ad3b7a3362712e9dbd6b7 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 6 May 2024 11:36:37 +0200
Subject: [PATCH 04/56] fix warning

---
 meilisearch/src/routes/mod.rs | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs
index 7cf886017..a2fceb764 100644
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -376,12 +376,6 @@ async fn get_version(
     })
 }
 
-#[derive(Serialize)]
-struct KeysResponse {
-    private: Option<String>,
-    public: Option<String>,
-}
-
 pub async fn get_health(
     req: HttpRequest,
     index_scheduler: Data<IndexScheduler>,

From f33a1282f8987dde2a7cde3d69297c564952b765 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 7 May 2024 10:31:39 +0200
Subject: [PATCH 05/56] Bump Rustls to v0.21.12

---
 Cargo.lock             | 160 ++++++++++++++++++++---------------------
 meilisearch/Cargo.toml |   2 +-
 2 files changed, 81 insertions(+), 81 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fad60e8da..2a8bdcbd3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -80,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb"
 dependencies = [
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -424,7 +424,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -435,7 +435,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -552,7 +552,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -685,7 +685,7 @@ checksum = "4da9a32f3fed317401fa3c862968128267c3106685286e15d5aaa3d7389c2f60"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -845,9 +845,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.0.90"
+version = "1.0.94"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
+checksum = "17f6e324229dc011159fcc089755d1e2e216a90d43a7dea6853ca740b84f35e7"
 dependencies = [
  "jobserver",
  "libc",
@@ -992,7 +992,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1304,7 +1304,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1326,7 +1326,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5"
 dependencies = [
  "darling_core 0.20.3",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1356,7 +1356,7 @@ checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1460,7 +1460,7 @@ dependencies = [
  "convert_case 0.6.0",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1678,7 +1678,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1698,7 +1698,7 @@ checksum = "03cdc46ec28bd728e67540c528013c6a10eb69a02eb31078a1bda695438cbfb8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1781,7 +1781,7 @@ dependencies = [
  "darling 0.20.3",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "uuid",
 ]
 
@@ -1913,7 +1913,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -2117,9 +2117,9 @@ checksum = "36d244a08113319b5ebcabad2b8b7925732d15eec46d7e7ac3c11734f3b7a6ad"
 
 [[package]]
 name = "getrandom"
-version = "0.2.12"
+version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
+checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -2413,7 +2413,7 @@ dependencies = [
  "futures-util",
  "http 0.2.11",
  "hyper",
- "rustls 0.21.10",
+ "rustls 0.21.12",
  "tokio",
  "tokio-rustls",
 ]
@@ -3157,7 +3157,7 @@ checksum = "fc2fb41a9bb4257a3803154bdf7e2df7d45197d1941c9b1a90ad815231630721"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3213,9 +3213,9 @@ checksum = "e34f76eb3611940e0e7d53a9aaa4e6a3151f69541a282fd0dad5571420c53ff1"
 
 [[package]]
 name = "lock_api"
-version = "0.4.10"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16"
+checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
 dependencies = [
  "autocfg",
  "scopeguard",
@@ -3258,7 +3258,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3342,7 +3342,7 @@ dependencies = [
  "rayon",
  "regex",
  "reqwest",
- "rustls 0.21.10",
+ "rustls 0.21.12",
  "rustls-pemfile",
  "segment",
  "serde",
@@ -3598,7 +3598,7 @@ checksum = "371717c0a5543d6a800cac822eac735aa7d2d2fbb41002e9856a4089532dbdce"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3942,7 +3942,7 @@ dependencies = [
  "pest_meta",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3996,7 +3996,7 @@ dependencies = [
  "phf_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -4025,7 +4025,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -4133,9 +4133,9 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.79"
+version = "1.0.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
+checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
 dependencies = [
  "unicode-ident",
 ]
@@ -4207,9 +4207,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.35"
+version = "1.0.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
 dependencies = [
  "proc-macro2",
 ]
@@ -4391,7 +4391,7 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
- "rustls 0.21.10",
+ "rustls 0.21.12",
  "rustls-pemfile",
  "serde",
  "serde_json",
@@ -4505,9 +4505,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.21.10"
+version = "0.21.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba"
+checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
 dependencies = [
  "log",
  "ring",
@@ -4517,9 +4517,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.22.2"
+version = "0.22.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41"
+checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c"
 dependencies = [
  "log",
  "ring",
@@ -4540,9 +4540,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.3.1"
+version = "1.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ede67b28608b4c60685c7d54122d4400d90f62b40caee7700e700380a390fa8"
+checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247"
 
 [[package]]
 name = "rustls-webpki"
@@ -4667,7 +4667,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -4941,7 +4941,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -4963,9 +4963,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.58"
+version = "2.0.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
+checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4989,7 +4989,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -5100,7 +5100,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -5243,7 +5243,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -5252,7 +5252,7 @@ version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
 dependencies = [
- "rustls 0.21.10",
+ "rustls 0.21.12",
  "tokio",
 ]
 
@@ -5354,7 +5354,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -5537,7 +5537,7 @@ dependencies = [
  "flate2",
  "log",
  "once_cell",
- "rustls 0.22.2",
+ "rustls 0.22.3",
  "rustls-pki-types",
  "rustls-webpki 0.102.2",
  "serde",
@@ -5703,7 +5703,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "wasm-bindgen-shared",
 ]
 
@@ -5737,7 +5737,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -5834,7 +5834,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
 dependencies = [
  "windows-core",
- "windows-targets 0.52.0",
+ "windows-targets 0.52.4",
 ]
 
 [[package]]
@@ -5843,7 +5843,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
 dependencies = [
- "windows-targets 0.52.0",
+ "windows-targets 0.52.4",
 ]
 
 [[package]]
@@ -5870,7 +5870,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.0",
+ "windows-targets 0.52.4",
 ]
 
 [[package]]
@@ -5905,17 +5905,17 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
+checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.0",
- "windows_aarch64_msvc 0.52.0",
- "windows_i686_gnu 0.52.0",
- "windows_i686_msvc 0.52.0",
- "windows_x86_64_gnu 0.52.0",
- "windows_x86_64_gnullvm 0.52.0",
- "windows_x86_64_msvc 0.52.0",
+ "windows_aarch64_gnullvm 0.52.4",
+ "windows_aarch64_msvc 0.52.4",
+ "windows_i686_gnu 0.52.4",
+ "windows_i686_msvc 0.52.4",
+ "windows_x86_64_gnu 0.52.4",
+ "windows_x86_64_gnullvm 0.52.4",
+ "windows_x86_64_msvc 0.52.4",
 ]
 
 [[package]]
@@ -5932,9 +5932,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
+checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -5950,9 +5950,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
+checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -5968,9 +5968,9 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
+checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -5986,9 +5986,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
+checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -6004,9 +6004,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
+checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -6022,9 +6022,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
+checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -6040,9 +6040,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.0"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
+checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
 
 [[package]]
 name = "winnow"
@@ -6140,7 +6140,7 @@ checksum = "9e6936f0cce458098a201c245a11bef556c6a0181129c7034d10d76d1ec3a2b8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "synstructure",
 ]
 
@@ -6161,7 +6161,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -6181,7 +6181,7 @@ checksum = "e6a647510471d372f2e6c2e6b7219e44d8c574d24fdc11c610a61455782f18c3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "synstructure",
 ]
 
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index 4a2b11b21..ed62c5f48 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -75,7 +75,7 @@ reqwest = { version = "0.11.23", features = [
     "rustls-tls",
     "json",
 ], default-features = false }
-rustls = "0.21.6"
+rustls = "0.21.12"
 rustls-pemfile = "1.0.2"
 segment = { version = "0.2.3", optional = true }
 serde = { version = "1.0.195", features = ["derive"] }

From ac4bc143c4dff49d6a5d2fb9730a7202c001b5c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 7 May 2024 10:39:38 +0200
Subject: [PATCH 06/56] Bump ureq to v2.9.7

---
 Cargo.lock                 | 34 ++++++++++++++++++++--------------
 index-scheduler/Cargo.toml |  2 +-
 milli/Cargo.toml           |  2 +-
 3 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2a8bdcbd3..937fce64a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -486,6 +486,12 @@ version = "0.21.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
 
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
 [[package]]
 name = "base64ct"
 version = "1.6.0"
@@ -4517,9 +4523,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.22.3"
+version = "0.22.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c"
+checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
 dependencies = [
  "log",
  "ring",
@@ -4643,9 +4649,9 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 
 [[package]]
 name = "serde"
-version = "1.0.197"
+version = "1.0.198"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
+checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc"
 dependencies = [
  "serde_derive",
 ]
@@ -4661,9 +4667,9 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.197"
+version = "1.0.198"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
+checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -4672,9 +4678,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.115"
+version = "1.0.116"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
+checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813"
 dependencies = [
  "indexmap",
  "itoa",
@@ -5469,9 +5475,9 @@ dependencies = [
 
 [[package]]
 name = "unicode-bidi"
-version = "0.3.13"
+version = "0.3.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92888ba5573ff080736b3648696b70cafad7d250551175acbaa4e0385b3e1460"
+checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75"
 
 [[package]]
 name = "unicode-blocks"
@@ -5529,15 +5535,15 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "ureq"
-version = "2.9.6"
+version = "2.9.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35"
+checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd"
 dependencies = [
- "base64 0.21.7",
+ "base64 0.22.1",
  "flate2",
  "log",
  "once_cell",
- "rustls 0.22.3",
+ "rustls 0.22.4",
  "rustls-pki-types",
  "rustls-webpki 0.102.2",
  "serde",
diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml
index c758f1114..4b6c0a36d 100644
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -37,7 +37,7 @@ time = { version = "0.3.31", features = [
     "macros",
 ] }
 tracing = "0.1.40"
-ureq = "2.9.1"
+ureq = "2.9.7"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }
 
 [dev-dependencies]
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 082cd0812..7d903178b 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -85,7 +85,7 @@ liquid = "0.26.4"
 arroy = "0.2.0"
 rand = "0.8.5"
 tracing = "0.1.40"
-ureq = { version = "2.9.6", features = ["json"] }
+ureq = { version = "2.9.7", features = ["json"] }
 url = "2.5.0"
 
 [dev-dependencies]

From 2a0ece814cc904a828bae325cd1977c6659bdc04 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 7 May 2024 12:23:36 +0200
Subject: [PATCH 07/56] Add precommands to workloads

---
 xtask/src/bench/workload.rs | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/xtask/src/bench/workload.rs b/xtask/src/bench/workload.rs
index d82c5ad19..db44b5a8f 100644
--- a/xtask/src/bench/workload.rs
+++ b/xtask/src/bench/workload.rs
@@ -22,6 +22,8 @@ pub struct Workload {
     pub run_count: u16,
     pub extra_cli_args: Vec<String>,
     pub assets: BTreeMap<String, Asset>,
+    #[serde(default)]
+    pub precommands: Vec<super::command::Command>,
     pub commands: Vec<super::command::Command>,
 }
 
@@ -37,6 +39,15 @@ async fn run_commands(
     let report_folder = &args.report_folder;
     let workload_name = &workload.name;
 
+    for batch in workload
+        .precommands
+        .as_slice()
+        .split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
+    {
+        super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
+            .await?;
+    }
+
     std::fs::create_dir_all(report_folder)
         .with_context(|| format!("could not create report directory at {report_folder}"))?;
 

From 43763eb98ac6e9e6630fb4b5f34c21f9539a6ac2 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 7 May 2024 12:25:57 +0200
Subject: [PATCH 08/56] Document precommands

---
 BENCHMARKS.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/BENCHMARKS.md b/BENCHMARKS.md
index e588b1b5b..e1d0c5feb 100644
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -187,8 +187,8 @@ They are JSON files with the following structure (comments are not actually supp
   },
   // Core of the workload.
   // A list of commands to run sequentially.
-  // A command is a request to the Meilisearch instance that is executed while the profiling runs.
-  "commands": [
+  // Optional: A precommand is a request to the Meilisearch instance that is executed before the profiling runs.
+  "precommands": [
     {
       // Meilisearch route to call. `http://localhost:7700/` will be prepended.
       "route": "indexes/movies/settings",
@@ -224,8 +224,11 @@ They are JSON files with the following structure (comments are not actually supp
       // - DontWait: run the next command without waiting the response to this one.
       // - WaitForResponse: run the next command as soon as the response from the server is received.
       // - WaitForTask: run the next command once **all** the Meilisearch tasks created up to now have finished processing.
-      "synchronous": "DontWait"
-    },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  // A command is a request to the Meilisearch instance that is executed while the profiling runs.
+  "commands": [
     {
       "route": "indexes/movies/documents",
       "method": "POST",

From 9d3ff11b21c896433f11a0ae9e2837095bbcabfe Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 7 May 2024 14:03:14 +0200
Subject: [PATCH 09/56] Modify existing workload files to use precommands

---
 workloads/hackernews.json                     |  8 +++---
 workloads/movies-nothreads.json               |  8 +++---
 workloads/movies-subset-hf-embeddings.json    |  6 +++--
 workloads/settings-add-embeddings.json        |  6 +++--
 workloads/settings-add-remove-filters.json    |  6 +++--
 workloads/settings-proximity-precision.json   |  6 +++--
 .../settings-remove-add-swap-searchable.json  |  6 +++--
 workloads/settings-typo.json                  | 27 ++++++++++++++++---
 8 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/workloads/hackernews.json b/workloads/hackernews.json
index 0a99b69ff..5762a7309 100644
--- a/workloads/hackernews.json
+++ b/workloads/hackernews.json
@@ -54,7 +54,7 @@
       "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "indexes/movies/settings",
       "method": "PATCH",
@@ -78,8 +78,10 @@
           ]
         }
       },
-      "synchronous": "DontWait"
-    },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/movies/documents",
       "method": "POST",
diff --git a/workloads/movies-nothreads.json b/workloads/movies-nothreads.json
index 175daacf9..d3dd006fa 100644
--- a/workloads/movies-nothreads.json
+++ b/workloads/movies-nothreads.json
@@ -11,7 +11,7 @@
       "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "indexes/movies/settings",
       "method": "PATCH",
@@ -30,8 +30,10 @@
           ]
         }
       },
-      "synchronous": "DontWait"
-    },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/movies/documents",
       "method": "POST",
diff --git a/workloads/movies-subset-hf-embeddings.json b/workloads/movies-subset-hf-embeddings.json
index d24bc752c..d7672cf73 100644
--- a/workloads/movies-subset-hf-embeddings.json
+++ b/workloads/movies-subset-hf-embeddings.json
@@ -11,7 +11,7 @@
       "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "experimental-features",
       "method": "PATCH",
@@ -55,7 +55,9 @@
         }
       },
       "synchronous": "WaitForTask"
-    },
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/movies/documents",
       "method": "POST",
diff --git a/workloads/settings-add-embeddings.json b/workloads/settings-add-embeddings.json
index f87286943..6ad50769a 100644
--- a/workloads/settings-add-embeddings.json
+++ b/workloads/settings-add-embeddings.json
@@ -11,7 +11,7 @@
       "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "experimental-features",
       "method": "PATCH",
@@ -49,7 +49,9 @@
         "asset": "movies-100.json"
       },
       "synchronous": "WaitForTask"
-    },
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/movies/settings",
       "method": "PATCH",
diff --git a/workloads/settings-add-remove-filters.json b/workloads/settings-add-remove-filters.json
index 12493a8fc..f017ed960 100644
--- a/workloads/settings-add-remove-filters.json
+++ b/workloads/settings-add-remove-filters.json
@@ -11,7 +11,7 @@
       "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
@@ -59,7 +59,9 @@
         "asset": "150k-people.json"
       },
       "synchronous": "WaitForTask"
-    },
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
diff --git a/workloads/settings-proximity-precision.json b/workloads/settings-proximity-precision.json
index 384f99e37..ac6d98da0 100644
--- a/workloads/settings-proximity-precision.json
+++ b/workloads/settings-proximity-precision.json
@@ -11,7 +11,7 @@
       "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
@@ -61,7 +61,9 @@
         "asset": "150k-people.json"
       },
       "synchronous": "WaitForTask"
-    },
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
diff --git a/workloads/settings-remove-add-swap-searchable.json b/workloads/settings-remove-add-swap-searchable.json
index 61db8822e..7f70d1ce8 100644
--- a/workloads/settings-remove-add-swap-searchable.json
+++ b/workloads/settings-remove-add-swap-searchable.json
@@ -11,7 +11,7 @@
       "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
@@ -61,7 +61,9 @@
         "asset": "150k-people.json"
       },
       "synchronous": "WaitForTask"
-    },
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
diff --git a/workloads/settings-typo.json b/workloads/settings-typo.json
index 45163bc98..e04135877 100644
--- a/workloads/settings-typo.json
+++ b/workloads/settings-typo.json
@@ -11,7 +11,7 @@
       "sha256": "28c359a0956958af0ba204ec11bad3045a0864a10b4838914fea25a01724f84b"
     }
   },
-  "commands": [
+  "precommands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
@@ -62,14 +62,18 @@
         "asset": "150k-people.json"
       },
       "synchronous": "WaitForTask"
-    },
+    }
+  ],
+  "commands": [
     {
       "route": "indexes/peoples/settings",
       "method": "PATCH",
       "body": {
         "inline": {
           "typoTolerance": {
-            "disableOnAttributes": ["featured_job_organization_name"]
+            "disableOnAttributes": [
+              "featured_job_organization_name"
+            ]
           }
         }
       },
@@ -93,7 +97,22 @@
       "body": {
         "inline": {
           "typoTolerance": {
-            "disableOnWords": ["Ben","Elowitz","Kevin","Flaherty", "Ron", "Dustin", "Owen", "Chris", "Mark", "Matt", "Peter", "Van", "Head", "of"]
+            "disableOnWords": [
+              "Ben",
+              "Elowitz",
+              "Kevin",
+              "Flaherty",
+              "Ron",
+              "Dustin",
+              "Owen",
+              "Chris",
+              "Mark",
+              "Matt",
+              "Peter",
+              "Van",
+              "Head",
+              "of"
+            ]
           }
         }
       },

From c22460045c8b0d5a830caeae5c3b1856ccfa7b90 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 6 May 2024 14:49:45 +0200
Subject: [PATCH 10/56] Stops returning an option in the internal searchable
 fields

---
 milli/src/fieldids_weights_map.rs             |  28 +++++
 milli/src/index.rs                            | 108 ++++++++++++------
 milli/src/lib.rs                              |   3 +
 milli/src/search/new/db_cache.rs              |  12 +-
 milli/src/search/new/exact_attribute.rs       |   8 +-
 milli/src/search/new/mod.rs                   |  17 +--
 .../search/new/ranking_rule_graph/fid/mod.rs  |  12 +-
 .../extract/extract_docid_word_positions.rs   |   6 +-
 milli/src/update/settings.rs                  |  16 +--
 9 files changed, 120 insertions(+), 90 deletions(-)
 create mode 100644 milli/src/fieldids_weights_map.rs

diff --git a/milli/src/fieldids_weights_map.rs b/milli/src/fieldids_weights_map.rs
new file mode 100644
index 000000000..255f6ab80
--- /dev/null
+++ b/milli/src/fieldids_weights_map.rs
@@ -0,0 +1,28 @@
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::{FieldId, Weight};
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct FieldidsWeightsMap {
+    map: HashMap<FieldId, Weight>,
+}
+
+impl FieldidsWeightsMap {
+    pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
+        self.map.insert(fid, weight)
+    }
+
+    pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
+        self.map.remove(&fid)
+    }
+
+    pub fn weight(&self, fid: FieldId) -> Option<Weight> {
+        self.map.get(&fid).copied()
+    }
+
+    pub fn max_weight(&self) -> Option<Weight> {
+        self.map.values().copied().max()
+    }
+}
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 27b273393..b6b07404b 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1,5 +1,6 @@
 use std::borrow::Cow;
 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::convert::TryInto;
 use std::fs::File;
 use std::path::Path;
 
@@ -25,8 +26,9 @@ use crate::proximity::ProximityPrecision;
 use crate::vector::EmbeddingConfig;
 use crate::{
     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
-    FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
-    Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64,
+    FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap,
+    GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec,
+    BEU16, BEU32, BEU64,
 };
 
 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -42,6 +44,7 @@ pub mod main_key {
     pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
     pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
     pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
+    pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map";
     pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids";
     pub const GEO_RTREE_KEY: &str = "geo-rtree";
     pub const PRIMARY_KEY_KEY: &str = "primary-key";
@@ -414,6 +417,32 @@ impl Index {
             .unwrap_or_default())
     }
 
+    /* fieldids weights map */
+    // This maps the fields ids to their weights.
+    // Their weights is defined by the ordering of the searchable attributes.
+
+    /// Writes the fieldids weights map which associates the field ids to their weights
+    pub(crate) fn put_fieldids_weights_map(
+        &self,
+        wtxn: &mut RwTxn,
+        map: &FieldidsWeightsMap,
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeJson<_>>().put(
+            wtxn,
+            main_key::FIELDIDS_WEIGHTS_MAP_KEY,
+            map,
+        )
+    }
+
+    /// Get the fieldids weights map which associates the field ids to their weights
+    pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
+        Ok(self
+            .main
+            .remap_types::<Str, SerdeJson<_>>()
+            .get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
+            .unwrap_or_default())
+    }
+
     /* geo rtree */
 
     /// Writes the provided `rtree` which associates coordinates to documents ids.
@@ -578,10 +607,12 @@ impl Index {
         wtxn: &mut RwTxn,
         user_fields: &[&str],
         fields_ids_map: &FieldsIdsMap,
-    ) -> heed::Result<()> {
+    ) -> Result<()> {
         // We can write the user defined searchable fields as-is.
         self.put_user_defined_searchable_fields(wtxn, user_fields)?;
 
+        let mut weights = self.fieldids_weights_map(&wtxn)?;
+
         // Now we generate the real searchable fields:
         // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
         // 2. Iterate over the user defined searchable fields.
@@ -589,17 +620,23 @@ impl Index {
         // (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
         let mut real_fields = user_fields.to_vec();
 
-        for field_from_map in fields_ids_map.names() {
-            for user_field in user_fields {
+        for (id, field_from_map) in fields_ids_map.iter() {
+            for (weight, user_field) in user_fields.iter().enumerate() {
                 if crate::is_faceted_by(field_from_map, user_field)
                     && !user_fields.contains(&field_from_map)
                 {
                     real_fields.push(field_from_map);
+
+                    let weight: u16 =
+                        weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
+                    weights.insert(id, weight as u16);
                 }
             }
         }
 
-        self.put_searchable_fields(wtxn, &real_fields)
+        self.put_searchable_fields(wtxn, &real_fields)?;
+        self.put_fieldids_weights_map(wtxn, &weights)?;
+        Ok(())
     }
 
     pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
@@ -623,28 +660,31 @@ impl Index {
     }
 
     /// Returns the searchable fields, those are the fields that are indexed,
-    /// if the searchable fields aren't there it means that **all** the fields are indexed.
-    pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> {
+    pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> {
         self.main
             .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
-            .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)
+            .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
+            .map(|fields| Ok(fields.into_iter().map(|field| Cow::Borrowed(field)).collect()))
+            .unwrap_or_else(|| {
+                Ok(self
+                    .fields_ids_map(rtxn)?
+                    .names()
+                    .map(|field| Cow::Owned(field.to_string()))
+                    .collect())
+            })
     }
 
     /// Identical to `searchable_fields`, but returns the ids instead.
-    pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> {
-        match self.searchable_fields(rtxn)? {
-            Some(fields) => {
-                let fields_ids_map = self.fields_ids_map(rtxn)?;
-                let mut fields_ids = Vec::new();
-                for name in fields {
-                    if let Some(field_id) = fields_ids_map.id(name) {
-                        fields_ids.push(field_id);
-                    }
-                }
-                Ok(Some(fields_ids))
+    pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> {
+        let fields = self.searchable_fields(rtxn)?;
+        let fields_ids_map = self.fields_ids_map(rtxn)?;
+        let mut fields_ids = Vec::new();
+        for name in fields {
+            if let Some(field_id) = fields_ids_map.id(&name) {
+                fields_ids.push(field_id);
             }
-            None => Ok(None),
         }
+        Ok(fields_ids)
     }
 
     /// Writes the searchable fields, when this list is specified, only these are indexed.
@@ -1710,10 +1750,14 @@ pub(crate) mod tests {
             ]))
             .unwrap();
 
-        db_snap!(index, field_distribution, 1);
+        db_snap!(index, field_distribution, @r###"
+        age              1      |
+        id               2      |
+        name             2      |
+        "###);
 
         db_snap!(index, word_docids,
-            @r###"
+        @r###"
         1                [0, ]
         2                [1, ]
         20               [1, ]
@@ -1722,18 +1766,6 @@ pub(crate) mod tests {
         "###
         );
 
-        db_snap!(index, field_distribution);
-
-        db_snap!(index, field_distribution,
-            @r###"
-        age              1      |
-        id               2      |
-        name             2      |
-        "###
-        );
-
-        // snapshot_index!(&index, "1", include: "^field_distribution$");
-
         // we add all the documents a second time. we are supposed to get the same
         // field_distribution in the end
         index
@@ -1820,7 +1852,7 @@ pub(crate) mod tests {
         // ensure we get the right real searchable fields + user defined searchable fields
         let rtxn = index.read_txn().unwrap();
 
-        let real = index.searchable_fields(&rtxn).unwrap().unwrap();
+        let real = index.searchable_fields(&rtxn).unwrap();
         assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
 
         let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
@@ -1840,7 +1872,7 @@ pub(crate) mod tests {
         // ensure we get the right real searchable fields + user defined searchable fields
         let rtxn = index.read_txn().unwrap();
 
-        let real = index.searchable_fields(&rtxn).unwrap().unwrap();
+        let real = index.searchable_fields(&rtxn).unwrap();
         assert_eq!(real, &["doggo", "name"]);
         let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
         assert_eq!(user_defined, &["doggo", "name"]);
@@ -1856,7 +1888,7 @@ pub(crate) mod tests {
         // ensure we get the right real searchable fields + user defined searchable fields
         let rtxn = index.read_txn().unwrap();
 
-        let real = index.searchable_fields(&rtxn).unwrap().unwrap();
+        let real = index.searchable_fields(&rtxn).unwrap();
         assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]);
 
         let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index a1e240464..881633b5c 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -28,6 +28,7 @@ pub mod vector;
 #[cfg(test)]
 #[macro_use]
 pub mod snapshot_tests;
+mod fieldids_weights_map;
 
 use std::collections::{BTreeMap, HashMap};
 use std::convert::{TryFrom, TryInto};
@@ -52,6 +53,7 @@ pub use self::error::{
     Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
 };
 pub use self::external_documents_ids::ExternalDocumentsIds;
+pub use self::fieldids_weights_map::FieldidsWeightsMap;
 pub use self::fields_ids_map::FieldsIdsMap;
 pub use self::heed_codec::{
     BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
@@ -77,6 +79,7 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
 pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
 pub type FieldDistribution = BTreeMap<String, u64>;
 pub type FieldId = u16;
+pub type Weight = u16;
 pub type Object = serde_json::Map<String, serde_json::Value>;
 pub type Position = u32;
 pub type RelativePosition = u16;
diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs
index 62c921a1d..a99000f60 100644
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@@ -315,11 +315,7 @@ impl<'ctx> SearchContext<'ctx> {
                         .map_err(heed::Error::Decoding)?
                 } else {
                     // Compute the distance at the attribute level and store it in the cache.
-                    let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
-                        fids
-                    } else {
-                        self.index.fields_ids_map(self.txn)?.ids().collect()
-                    };
+                    let fids = self.index.searchable_fields_ids(self.txn)?;
                     let mut docids = RoaringBitmap::new();
                     for fid in fids {
                         // for each field, intersect left word bitmap and right word bitmap,
@@ -408,11 +404,7 @@ impl<'ctx> SearchContext<'ctx> {
             let prefix_docids = match proximity_precision {
                 ProximityPrecision::ByAttribute => {
                     // Compute the distance at the attribute level and store it in the cache.
-                    let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? {
-                        fids
-                    } else {
-                        self.index.fields_ids_map(self.txn)?.ids().collect()
-                    };
+                    let fids = self.index.searchable_fields_ids(self.txn)?;
                     let mut prefix_docids = RoaringBitmap::new();
                     // for each field, intersect left word bitmap and right word bitmap,
                     // then merge the result in a global bitmap before storing it in the cache.
diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs
index 7932f0c2a..41b70ae39 100644
--- a/milli/src/search/new/exact_attribute.rs
+++ b/milli/src/search/new/exact_attribute.rs
@@ -184,13 +184,7 @@ impl State {
             return Ok(State::Empty(query_graph.clone()));
         }
 
-        let searchable_fields_ids = {
-            if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? {
-                fids
-            } else {
-                ctx.index.fields_ids_map(ctx.txn)?.ids().collect()
-            }
-        };
+        let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?;
 
         let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
         // then check that there exists at least one attribute that has all of the terms
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 617068ef8..acbb3638b 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -96,27 +96,22 @@ impl<'ctx> SearchContext<'ctx> {
                 contains_wildcard = true;
                 continue;
             }
-            let searchable_contains_name =
-                searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name));
+            let searchable_contains_name = searchable_names.iter().any(|name| name == field_name);
             let fid = match (fids_map.id(field_name), searchable_contains_name) {
                 // The Field id exist and the field is searchable
-                (Some(fid), Some(true)) | (Some(fid), None) => fid,
+                (Some(fid), true) => fid,
                 // The field is searchable but the Field id doesn't exist => Internal Error
-                (None, Some(true)) => {
+                (None, true) => {
                     return Err(FieldIdMapMissingEntry::FieldName {
                         field_name: field_name.to_string(),
                         process: "search",
                     }
                     .into())
                 }
-                // The field is not searchable, but the searchableAttributes are set to * => ignore field
-                (None, None) => continue,
                 // The field is not searchable => User error
-                (_fid, Some(false)) => {
-                    let (valid_fields, hidden_fields) = match searchable_names {
-                        Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
-                        None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
-                    };
+                (_fid, false) => {
+                    let (valid_fields, hidden_fields) =
+                        self.index.remove_hidden_fields(self.txn, searchable_names)?;
 
                     let field = field_name.to_string();
                     return Err(UserError::InvalidSearchableAttribute {
diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
index 8f3e0cc82..cf65249de 100644
--- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
@@ -77,17 +77,7 @@ impl RankingRuleGraphTrait for FidGraph {
         }
 
         // always lookup the max_fid if we don't already and add an artificial condition for max scoring
-        let max_fid: Option<u16> = {
-            if let Some(max_fid) = ctx
-                .index
-                .searchable_fields_ids(ctx.txn)?
-                .map(|field_ids| field_ids.into_iter().max())
-            {
-                max_fid
-            } else {
-                ctx.index.fields_ids_map(ctx.txn)?.ids().max()
-            }
-        };
+        let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max();
 
         if let Some(max_fid) = max_fid {
             if !all_fields.contains(&max_fid) {
diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index 6af5bba6d..d97b6639e 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -186,7 +186,7 @@ fn searchable_fields_changed(
 ) -> bool {
     let searchable_fields = &settings_diff.new.searchable_fields_ids;
     for (field_id, field_bytes) in obkv.iter() {
-        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
+        if searchable_fields.contains(&field_id) {
             let del_add = KvReaderDelAdd::new(field_bytes);
             match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
                 // if both fields are None, check the next field.
@@ -298,7 +298,7 @@ fn lang_safe_tokens_from_document<'a>(
 /// Extract words mapped with their positions of a document.
 fn tokens_from_document<'a>(
     obkv: &KvReader<FieldId>,
-    searchable_fields: &Option<Vec<FieldId>>,
+    searchable_fields: &[FieldId],
     tokenizer: &Tokenizer,
     max_positions_per_attributes: u32,
     del_add: DelAdd,
@@ -309,7 +309,7 @@ fn tokens_from_document<'a>(
     let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
     for (field_id, field_bytes) in obkv.iter() {
         // if field is searchable.
-        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
+        if searchable_fields.as_ref().contains(&field_id) {
             // extract deletion or addition only.
             if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
                 // parse json.
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 1997e966e..c0742a74a 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -468,14 +468,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
             Setting::Set(ref fields) => {
                 // Check to see if the searchable fields changed before doing anything else
                 let old_fields = self.index.searchable_fields(self.wtxn)?;
-                let did_change = match old_fields {
-                    // If old_fields is Some, let's check to see if the fields actually changed
-                    Some(old_fields) => {
-                        let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
-                        new_fields != old_fields
-                    }
-                    // If old_fields is None, the fields have changed (because they are being set)
-                    None => true,
+                let did_change = {
+                    let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>();
+                    new_fields != old_fields
                 };
                 if !did_change {
                     return Ok(false);
@@ -1172,7 +1167,7 @@ pub(crate) struct InnerIndexSettings {
     pub user_defined_faceted_fields: HashSet<String>,
     pub user_defined_searchable_fields: Option<Vec<String>>,
     pub faceted_fields_ids: HashSet<FieldId>,
-    pub searchable_fields_ids: Option<Vec<FieldId>>,
+    pub searchable_fields_ids: Vec<FieldId>,
     pub exact_attributes: HashSet<FieldId>,
     pub proximity_precision: ProximityPrecision,
     pub embedding_configs: EmbeddingConfigs,
@@ -1517,6 +1512,7 @@ mod tests {
     use big_s::S;
     use heed::types::Bytes;
     use maplit::{btreemap, btreeset, hashset};
+    use meili_snap::snapshot;
 
     use super::*;
     use crate::error::Error;
@@ -1576,7 +1572,7 @@ mod tests {
         // Check that the searchable field have been reset and documents are found now.
         let rtxn = index.read_txn().unwrap();
         let searchable_fields = index.searchable_fields(&rtxn).unwrap();
-        assert_eq!(searchable_fields, None);
+        snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###);
         let result = index.search(&rtxn).query("23").execute().unwrap();
         assert_eq!(result.documents_ids.len(), 1);
         let documents = index.documents(&rtxn, result.documents_ids).unwrap();

From 4e4a1ddff7807c1268579bda54c53cd6fca29547 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 7 May 2024 16:37:34 +0200
Subject: [PATCH 11/56] gate a test behind the required feature

---
 milli/src/update/index_documents/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index bb180a7ee..936ce1efc 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -3260,6 +3260,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(feature = "all-tokenizations")]
     fn stored_detected_script_and_language_should_not_return_deleted_documents() {
         use charabia::{Language, Script};
         let index = TempIndex::new();

From 685f452fb2524c9c3f67218fb2dd273d59ba5110 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 7 May 2024 17:56:40 +0200
Subject: [PATCH 12/56] Fix the indexing of the searchable

---
 milli/examples/search.rs                      |   2 +-
 milli/src/fieldids_weights_map.rs             |   4 +
 milli/src/index.rs                            |  85 ++++++++++-
 milli/src/search/mod.rs                       |   4 +-
 milli/src/search/new/db_cache.rs              | 140 ++++++------------
 .../src/search/new/matches/matching_words.rs  |   2 +-
 milli/src/search/new/matches/mod.rs           |   2 +-
 milli/src/search/new/mod.rs                   |  83 ++++++-----
 .../src/search/new/query_term/parse_query.rs  |   2 +-
 milli/src/search/new/tests/attribute_fid.rs   |  15 +-
 milli/src/snapshot_tests.rs                   |  25 ++++
 milli/src/update/settings.rs                  |  25 ++--
 12 files changed, 235 insertions(+), 154 deletions(-)

diff --git a/milli/examples/search.rs b/milli/examples/search.rs
index 8640acf42..3d10ec599 100644
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@@ -48,7 +48,7 @@ fn main() -> Result<(), Box<dyn Error>> {
 
             let start = Instant::now();
 
-            let mut ctx = SearchContext::new(&index, &txn);
+            let mut ctx = SearchContext::new(&index, &txn)?;
             let universe = filtered_universe(&ctx, &None)?;
 
             let docs = execute_search(
diff --git a/milli/src/fieldids_weights_map.rs b/milli/src/fieldids_weights_map.rs
index 255f6ab80..bead160e9 100644
--- a/milli/src/fieldids_weights_map.rs
+++ b/milli/src/fieldids_weights_map.rs
@@ -25,4 +25,8 @@ impl FieldidsWeightsMap {
     pub fn max_weight(&self) -> Option<Weight> {
         self.map.values().copied().max()
     }
+
+    pub fn ids<'a>(&'a self) -> impl Iterator<Item = FieldId> + 'a {
+        self.map.keys().copied()
+    }
 }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index b6b07404b..e9f0f75de 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -28,7 +28,7 @@ use crate::{
     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
     FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap,
     GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec,
-    BEU16, BEU32, BEU64,
+    Weight, BEU16, BEU32, BEU64,
 };
 
 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -443,6 +443,27 @@ impl Index {
             .unwrap_or_default())
     }
 
+    pub fn searchable_fields_and_weights<'a>(
+        &self,
+        rtxn: &'a RoTxn,
+    ) -> heed::Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
+        let fid_map = self.fields_ids_map(rtxn)?;
+        let weight_map = self.fieldids_weights_map(rtxn)?;
+        let searchable = self.searchable_fields(rtxn)?;
+
+        Ok(searchable
+            .into_iter()
+            .map(|field| {
+                // the searchable attributes are a subset of the field id map
+                let fid = fid_map.id(&field).unwrap();
+                // all the searchable fields have a weight
+                let weight = weight_map.weight(fid).unwrap();
+
+                (field, fid, weight)
+            })
+            .collect())
+    }
+
     /* geo rtree */
 
     /// Writes the provided `rtree` which associates coordinates to documents ids.
@@ -605,9 +626,25 @@ impl Index {
     pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
         &self,
         wtxn: &mut RwTxn,
-        user_fields: &[&str],
+        user_fields: Option<&[&str]>,
         fields_ids_map: &FieldsIdsMap,
     ) -> Result<()> {
+        // Special case if there is no user defined fields.
+        // Then the whole field id map is marked as searchable.
+        if user_fields.is_none() {
+            let mut weights = self.fieldids_weights_map(&wtxn)?;
+            let mut searchable = Vec::new();
+            for (weight, (fid, name)) in fields_ids_map.iter().enumerate() {
+                searchable.push(name);
+                weights.insert(fid, weight as u16);
+            }
+            self.put_searchable_fields(wtxn, &searchable)?;
+            self.put_fieldids_weights_map(wtxn, &weights)?;
+            return Ok(());
+        }
+
+        let user_fields = user_fields.unwrap();
+
         // We can write the user defined searchable fields as-is.
         self.put_user_defined_searchable_fields(wtxn, user_fields)?;
 
@@ -617,13 +654,13 @@ impl Index {
         // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
         // 2. Iterate over the user defined searchable fields.
         // 3. If a user defined field is a subset of a field defined in the fields_ids_map
-        // (ie doggo.name is a subset of doggo) then we push it at the end of the fields.
-        let mut real_fields = user_fields.to_vec();
+        // (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
+        let mut real_fields = Vec::new();
 
         for (id, field_from_map) in fields_ids_map.iter() {
             for (weight, user_field) in user_fields.iter().enumerate() {
                 if crate::is_faceted_by(field_from_map, user_field)
-                    && !user_fields.contains(&field_from_map)
+                    && !real_fields.contains(&field_from_map)
                 {
                     real_fields.push(field_from_map);
 
@@ -2427,6 +2464,14 @@ pub(crate) mod tests {
         11                       0
         4                        1
         "###);
+        db_snap!(index, fields_ids_map, @r###"
+        0   primary_key      |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["primary_key"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        "###);
 
         index
             .add_documents(documents!([
@@ -2442,6 +2487,16 @@ pub(crate) mod tests {
         11                       0
         4                        1
         "###);
+        db_snap!(index, fields_ids_map, @r###"
+        0   primary_key      |
+        1   a                |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        1   1   |
+        "###);
 
         index.delete_documents(Default::default());
 
@@ -2452,6 +2507,16 @@ pub(crate) mod tests {
         11                       0
         4                        1
         "###);
+        db_snap!(index, fields_ids_map, @r###"
+        0   primary_key      |
+        1   a                |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        1   1   |
+        "###);
 
         index
             .add_documents(documents!([
@@ -2467,6 +2532,16 @@ pub(crate) mod tests {
         11                       0
         4                        1
         "###);
+        db_snap!(index, fields_ids_map, @r###"
+        0   primary_key      |
+        1   a                |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["primary_key", "a"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        1   1   |
+        "###);
 
         let rtxn = index.read_txn().unwrap();
         let search = Search::new(&rtxn, &index);
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index bab67e6bd..7427db3a1 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -147,7 +147,7 @@ impl<'a> Search<'a> {
 
     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
         if has_vector_search {
-            let ctx = SearchContext::new(self.index, self.rtxn);
+            let ctx = SearchContext::new(self.index, self.rtxn)?;
             filtered_universe(&ctx, &self.filter)
         } else {
             Ok(self.execute()?.candidates)
@@ -155,7 +155,7 @@ impl<'a> Search<'a> {
     }
 
     pub fn execute(&self) -> Result<SearchResult> {
-        let mut ctx = SearchContext::new(self.index, self.rtxn);
+        let mut ctx = SearchContext::new(self.index, self.rtxn)?;
 
         if let Some(searchable_attributes) = self.searchable_attributes {
             ctx.searchable_attributes(searchable_attributes)?;
diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs
index a99000f60..4985f55e9 100644
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@@ -159,58 +159,36 @@ impl<'ctx> SearchContext<'ctx> {
 
     /// Retrieve or insert the given value in the `word_docids` database.
     fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
-        match &self.restricted_fids {
-            Some(restricted_fids) => {
-                let interned = self.word_interner.get(word).as_str();
-                let keys: Vec<_> =
-                    restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
+        let interned = self.word_interner.get(word).as_str();
+        let keys: Vec<_> =
+            self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
 
-                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-                    self.txn,
-                    word,
-                    &keys[..],
-                    &mut self.db_cache.word_docids,
-                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
-                    merge_cbo_roaring_bitmaps,
-                )
-            }
-            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-                self.txn,
-                word,
-                self.word_interner.get(word).as_str(),
-                &mut self.db_cache.word_docids,
-                self.index.word_docids.remap_data_type::<Bytes>(),
-            ),
-        }
+        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+            self.txn,
+            word,
+            &keys[..],
+            &mut self.db_cache.word_docids,
+            self.index.word_fid_docids.remap_data_type::<Bytes>(),
+            merge_cbo_roaring_bitmaps,
+        )
     }
 
     fn get_db_exact_word_docids(
         &mut self,
         word: Interned<String>,
     ) -> Result<Option<RoaringBitmap>> {
-        match &self.restricted_fids {
-            Some(restricted_fids) => {
-                let interned = self.word_interner.get(word).as_str();
-                let keys: Vec<_> =
-                    restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
+        let interned = self.word_interner.get(word).as_str();
+        let keys: Vec<_> =
+            self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
 
-                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-                    self.txn,
-                    word,
-                    &keys[..],
-                    &mut self.db_cache.exact_word_docids,
-                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
-                    merge_cbo_roaring_bitmaps,
-                )
-            }
-            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-                self.txn,
-                word,
-                self.word_interner.get(word).as_str(),
-                &mut self.db_cache.exact_word_docids,
-                self.index.exact_word_docids.remap_data_type::<Bytes>(),
-            ),
-        }
+        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+            self.txn,
+            word,
+            &keys[..],
+            &mut self.db_cache.exact_word_docids,
+            self.index.word_fid_docids.remap_data_type::<Bytes>(),
+            merge_cbo_roaring_bitmaps,
+        )
     }
 
     pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@@ -238,58 +216,36 @@ impl<'ctx> SearchContext<'ctx> {
         &mut self,
         prefix: Interned<String>,
     ) -> Result<Option<RoaringBitmap>> {
-        match &self.restricted_fids {
-            Some(restricted_fids) => {
-                let interned = self.word_interner.get(prefix).as_str();
-                let keys: Vec<_> =
-                    restricted_fids.tolerant.iter().map(|fid| (interned, *fid)).collect();
+        let interned = self.word_interner.get(prefix).as_str();
+        let keys: Vec<_> =
+            self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
 
-                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-                    self.txn,
-                    prefix,
-                    &keys[..],
-                    &mut self.db_cache.word_prefix_docids,
-                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
-                    merge_cbo_roaring_bitmaps,
-                )
-            }
-            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-                self.txn,
-                prefix,
-                self.word_interner.get(prefix).as_str(),
-                &mut self.db_cache.word_prefix_docids,
-                self.index.word_prefix_docids.remap_data_type::<Bytes>(),
-            ),
-        }
+        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+            self.txn,
+            prefix,
+            &keys[..],
+            &mut self.db_cache.word_prefix_docids,
+            self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+            merge_cbo_roaring_bitmaps,
+        )
     }
 
     fn get_db_exact_word_prefix_docids(
         &mut self,
         prefix: Interned<String>,
     ) -> Result<Option<RoaringBitmap>> {
-        match &self.restricted_fids {
-            Some(restricted_fids) => {
-                let interned = self.word_interner.get(prefix).as_str();
-                let keys: Vec<_> =
-                    restricted_fids.exact.iter().map(|fid| (interned, *fid)).collect();
+        let interned = self.word_interner.get(prefix).as_str();
+        let keys: Vec<_> =
+            self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
 
-                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-                    self.txn,
-                    prefix,
-                    &keys[..],
-                    &mut self.db_cache.exact_word_prefix_docids,
-                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
-                    merge_cbo_roaring_bitmaps,
-                )
-            }
-            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-                self.txn,
-                prefix,
-                self.word_interner.get(prefix).as_str(),
-                &mut self.db_cache.exact_word_prefix_docids,
-                self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
-            ),
-        }
+        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+            self.txn,
+            prefix,
+            &keys[..],
+            &mut self.db_cache.exact_word_prefix_docids,
+            self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+            merge_cbo_roaring_bitmaps,
+        )
     }
 
     pub fn get_db_word_pair_proximity_docids(
@@ -465,8 +421,8 @@ impl<'ctx> SearchContext<'ctx> {
         word: Interned<String>,
         fid: u16,
     ) -> Result<Option<RoaringBitmap>> {
-        // if the requested fid isn't in the restricted list, return None.
-        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
+        // if the requested fid isn't in the list of searchable, return None.
+        if !self.searchable_fids.contains(&fid) {
             return Ok(None);
         }
 
@@ -484,8 +440,8 @@ impl<'ctx> SearchContext<'ctx> {
         word_prefix: Interned<String>,
         fid: u16,
     ) -> Result<Option<RoaringBitmap>> {
-        // if the requested fid isn't in the restricted list, return None.
-        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
+        // if the requested fid isn't in the searchable list, return None.
+        if !self.searchable_fids.contains(&fid) {
             return Ok(None);
         }
 
diff --git a/milli/src/search/new/matches/matching_words.rs b/milli/src/search/new/matches/matching_words.rs
index 56bf6c169..4db1c99c6 100644
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@@ -258,7 +258,7 @@ pub(crate) mod tests {
     fn matching_words() {
         let temp_index = temp_index_with_documents();
         let rtxn = temp_index.read_txn().unwrap();
-        let mut ctx = SearchContext::new(&temp_index, &rtxn);
+        let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
         let mut builder = TokenizerBuilder::default();
         let tokenizer = builder.build();
         let tokens = tokenizer.tokenize("split this world");
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 8f0069589..40e6f8dc8 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -506,7 +506,7 @@ mod tests {
 
     impl<'a> MatcherBuilder<'a> {
         fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
-            let mut ctx = SearchContext::new(index, rtxn);
+            let mut ctx = SearchContext::new(index, rtxn).unwrap();
             let universe = filtered_universe(&ctx, &None).unwrap();
             let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
                 &mut ctx,
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index acbb3638b..90d971fa3 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -49,13 +49,12 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
 use self::graph_based_ranking_rule::Words;
 use self::interner::Interned;
 use self::vector_sort::VectorSort;
-use crate::error::FieldIdMapMissingEntry;
 use crate::score_details::{ScoreDetails, ScoringStrategy};
 use crate::search::new::distinct::apply_distinct_rule;
 use crate::vector::Embedder;
 use crate::{
     AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
-    UserError,
+    UserError, Weight,
 };
 
 /// A structure used throughout the execution of a search query.
@@ -67,12 +66,25 @@ pub struct SearchContext<'ctx> {
     pub phrase_interner: DedupInterner<Phrase>,
     pub term_interner: Interner<QueryTerm>,
     pub phrase_docids: PhraseDocIdsCache,
-    pub restricted_fids: Option<RestrictedFids>,
+    pub searchable_fids: SearchableFids,
 }
 
 impl<'ctx> SearchContext<'ctx> {
-    pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Self {
-        Self {
+    pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
+        let searchable_fids = index.searchable_fields_and_weights(txn)?;
+        let exact_attributes_ids = index.exact_attributes_ids(txn)?;
+
+        let mut exact = Vec::new();
+        let mut tolerant = Vec::new();
+        for (name, fid, weight) in searchable_fids {
+            if exact_attributes_ids.contains(&fid) {
+                exact.push((fid, weight));
+            } else {
+                tolerant.push((fid, weight));
+            }
+        }
+
+        Ok(Self {
             index,
             txn,
             db_cache: <_>::default(),
@@ -80,38 +92,32 @@ impl<'ctx> SearchContext<'ctx> {
             phrase_interner: <_>::default(),
             term_interner: <_>::default(),
             phrase_docids: <_>::default(),
-            restricted_fids: None,
-        }
+            searchable_fids: SearchableFids { tolerant, exact },
+        })
     }
 
-    pub fn searchable_attributes(&mut self, searchable_attributes: &'ctx [String]) -> Result<()> {
+    // TODO: TAMO continue here
+    pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
+        if attributes_to_search_on.contains(&String::from("*")) {
+            return Ok(());
+        }
+
         let fids_map = self.index.fields_ids_map(self.txn)?;
-        let searchable_names = self.index.searchable_fields(self.txn)?;
+        let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
         let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
 
-        let mut restricted_fids = RestrictedFids::default();
-        let mut contains_wildcard = false;
-        for field_name in searchable_attributes {
-            if field_name == "*" {
-                contains_wildcard = true;
-                continue;
-            }
-            let searchable_contains_name = searchable_names.iter().any(|name| name == field_name);
-            let fid = match (fids_map.id(field_name), searchable_contains_name) {
+        let mut restricted_fids = SearchableFids::default();
+        for field_name in attributes_to_search_on {
+            let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
+            let (fid, weight) = match searchable_weight {
                 // The Field id exist and the field is searchable
-                (Some(fid), true) => fid,
-                // The field is searchable but the Field id doesn't exist => Internal Error
-                (None, true) => {
-                    return Err(FieldIdMapMissingEntry::FieldName {
-                        field_name: field_name.to_string(),
-                        process: "search",
-                    }
-                    .into())
-                }
+                Some((_name, fid, weight)) => (*fid, *weight),
                 // The field is not searchable => User error
-                (_fid, false) => {
-                    let (valid_fields, hidden_fields) =
-                        self.index.remove_hidden_fields(self.txn, searchable_names)?;
+                None => {
+                    let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
+                        self.txn,
+                        searchable_names.iter().map(|(name, _, _)| name),
+                    )?;
 
                     let field = field_name.to_string();
                     return Err(UserError::InvalidSearchableAttribute {
@@ -124,13 +130,13 @@ impl<'ctx> SearchContext<'ctx> {
             };
 
             if exact_attributes_ids.contains(&fid) {
-                restricted_fids.exact.push(fid);
+                restricted_fids.exact.push((fid, weight));
             } else {
-                restricted_fids.tolerant.push(fid);
+                restricted_fids.tolerant.push((fid, weight));
             };
         }
 
-        self.restricted_fids = (!contains_wildcard).then_some(restricted_fids);
+        self.searchable_fids = restricted_fids;
 
         Ok(())
     }
@@ -152,14 +158,15 @@ impl Word {
 }
 
 #[derive(Debug, Clone, Default)]
-pub struct RestrictedFids {
-    pub tolerant: Vec<FieldId>,
-    pub exact: Vec<FieldId>,
+pub struct SearchableFids {
+    pub tolerant: Vec<(FieldId, Weight)>,
+    pub exact: Vec<(FieldId, Weight)>,
 }
 
-impl RestrictedFids {
+impl SearchableFids {
     pub fn contains(&self, fid: &FieldId) -> bool {
-        self.tolerant.contains(fid) || self.exact.contains(fid)
+        self.tolerant.iter().find(|(id, _)| id == fid).is_some()
+            || self.exact.iter().find(|(id, _)| id == fid).is_some()
     }
 }
 
diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs
index 93f5f081c..74b2ed564 100644
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -366,7 +366,7 @@ mod tests {
         let tokens = tokenizer.tokenize(".");
         let index = temp_index_with_documents();
         let rtxn = index.read_txn()?;
-        let mut ctx = SearchContext::new(&index, &rtxn);
+        let mut ctx = SearchContext::new(&index, &rtxn)?;
         // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
         let ExtractedTokens { query_terms, .. } =
             located_query_terms_from_tokens(&mut ctx, tokens, None)?;
diff --git a/milli/src/search/new/tests/attribute_fid.rs b/milli/src/search/new/tests/attribute_fid.rs
index 38225404c..61b0a743b 100644
--- a/milli/src/search/new/tests/attribute_fid.rs
+++ b/milli/src/search/new/tests/attribute_fid.rs
@@ -1,5 +1,5 @@
 use crate::index::tests::TempIndex;
-use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
+use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};
 
 fn create_index() -> TempIndex {
     let index = TempIndex::new();
@@ -131,6 +131,19 @@ fn test_attribute_fid_simple() {
 #[test]
 fn test_attribute_fid_ngrams() {
     let index = create_index();
+    db_snap!(index, fields_ids_map, @r###"
+    0   title            |
+    1   description      |
+    2   plot             |
+    3   id               |
+    "###);
+    db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
+    db_snap!(index, fieldids_weights_map, @r###"
+    fid weight
+    0   0   |
+    1   1   |
+    2   2   |
+    "###);
 
     let txn = index.read_txn().unwrap();
 
diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs
index 28c4cb45c..d79003747 100644
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@@ -308,6 +308,25 @@ pub fn snap_fields_ids_map(index: &Index) -> String {
     }
     snap
 }
+pub fn snap_fieldids_weights_map(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let weights_map = index.fieldids_weights_map(&rtxn).unwrap();
+
+    let mut snap = String::new();
+    writeln!(&mut snap, "fid weight").unwrap();
+    let mut field_ids: Vec<_> = weights_map.ids().collect();
+    field_ids.sort();
+    for field_id in field_ids {
+        let weight = weights_map.weight(field_id).unwrap();
+        writeln!(&mut snap, "{field_id:<3} {weight:<3} |").unwrap();
+    }
+    snap
+}
+pub fn snap_searchable_fields(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let searchable_fields = index.searchable_fields(&rtxn).unwrap();
+    format!("{searchable_fields:?}")
+}
 pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
     let rtxn = index.read_txn().unwrap();
     let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
@@ -469,6 +488,12 @@ macro_rules! full_snap_of_db {
     ($index:ident, fields_ids_map) => {{
         $crate::snapshot_tests::snap_fields_ids_map(&$index)
     }};
+    ($index:ident, fieldids_weights_map) => {{
+        $crate::snapshot_tests::snap_fieldids_weights_map(&$index)
+    }};
+    ($index:ident, searchable_fields) => {{
+        $crate::snapshot_tests::snap_searchable_fields(&$index)
+    }};
     ($index:ident, geo_faceted_documents_ids) => {{
         $crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
     }};
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index c0742a74a..19b2c5778 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -496,7 +496,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 
                 self.index.put_all_searchable_fields_from_fields_ids_map(
                     self.wtxn,
-                    &names,
+                    Some(&names),
                     &new_fields_ids_map,
                 )?;
                 self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
@@ -1228,18 +1228,19 @@ impl InnerIndexSettings {
 
     // find and insert the new field ids
     pub fn recompute_searchables(&mut self, wtxn: &mut heed::RwTxn, index: &Index) -> Result<()> {
+        let searchable_fields = self
+            .user_defined_searchable_fields
+            .as_ref()
+            .map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
+
         // in case new fields were introduced we're going to recreate the searchable fields.
-        if let Some(searchable_fields) = self.user_defined_searchable_fields.as_ref() {
-            let searchable_fields =
-                searchable_fields.iter().map(String::as_ref).collect::<Vec<_>>();
-            index.put_all_searchable_fields_from_fields_ids_map(
-                wtxn,
-                &searchable_fields,
-                &self.fields_ids_map,
-            )?;
-            let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
-            self.searchable_fields_ids = searchable_fields_ids;
-        }
+        index.put_all_searchable_fields_from_fields_ids_map(
+            wtxn,
+            searchable_fields.as_deref(),
+            &self.fields_ids_map,
+        )?;
+        let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
+        self.searchable_fields_ids = searchable_fields_ids;
 
         Ok(())
     }

From 9ecde418531e199a9c558b4273e63e58649ff35d Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 13 May 2024 16:18:05 +0200
Subject: [PATCH 13/56] add a test on the current behaviour

---
 milli/src/index.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/milli/src/index.rs b/milli/src/index.rs
index e9f0f75de..c66222ab1 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -2627,4 +2627,52 @@ pub(crate) mod tests {
 
         db_snap!(index, geo_faceted_documents_ids); // ensure that no documents were inserted
     }
+
+    #[test]
+    fn swapping_searchable_attributes() {
+        // See https://github.com/meilisearch/meilisearch/issues/4484
+
+        let index = TempIndex::new();
+
+        index
+            .update_settings(|settings| {
+                settings.set_searchable_fields(vec![S("name")]);
+                settings.set_filterable_fields(HashSet::from([S("age")]));
+            })
+            .unwrap();
+
+        index
+            .add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
+            .unwrap();
+        db_snap!(index, fields_ids_map, @r###"
+        0   name             |
+        1   id               |
+        2   age              |
+        3   realName         |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["name"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        "###);
+
+        index
+            .update_settings(|settings| {
+                settings.set_searchable_fields(vec![S("name"), S("realName")]);
+                settings.set_filterable_fields(HashSet::from([S("age")]));
+            })
+            .unwrap();
+        db_snap!(index, fields_ids_map, @r###"
+        0   name             |
+        1   realName         |
+        2   id               |
+        3   age              |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        1   1   |
+        "###);
+    }
 }

From b0afe0972e109bdaaa532ef5f125e02f83930ab0 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 13 May 2024 16:49:08 +0200
Subject: [PATCH 14/56] stop updating the fields ids map when fields are only
 swapped

---
 milli/src/index.rs           |  9 +++++----
 milli/src/update/settings.rs | 21 ++++++++++++---------
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/milli/src/index.rs b/milli/src/index.rs
index c66222ab1..d0d148d86 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -2662,17 +2662,18 @@ pub(crate) mod tests {
                 settings.set_filterable_fields(HashSet::from([S("age")]));
             })
             .unwrap();
+        // The order of the field id map shouldn't change
         db_snap!(index, fields_ids_map, @r###"
         0   name             |
-        1   realName         |
-        2   id               |
-        3   age              |
+        1   id               |
+        2   age              |
+        3   realName         |
         "###);
         db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
         db_snap!(index, fieldids_weights_map, @r###"
         fid weight
         0   0   |
-        1   1   |
+        3   1   |
         "###);
     }
 }
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 19b2c5778..6875e6f47 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -12,6 +12,7 @@ use time::OffsetDateTime;
 use super::index_documents::{IndexDocumentsConfig, Transform};
 use super::IndexerConfig;
 use crate::criterion::Criterion;
+use crate::documents::FieldIdMapper;
 use crate::error::UserError;
 use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
 use crate::order_by_map::OrderByMap;
@@ -461,8 +462,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         Ok(true)
     }
 
-    /// Updates the index's searchable attributes. This causes the field map to be recomputed to
-    /// reflect the order of the searchable attributes.
+    /// Updates the index's searchable attributes.
     fn update_searchable(&mut self) -> Result<bool> {
         match self.searchable_fields {
             Setting::Set(ref fields) => {
@@ -480,17 +480,20 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                 // ids for any settings that uses the facets. (distinct_fields, filterable_fields).
                 let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
 
-                let mut new_fields_ids_map = FieldsIdsMap::new();
-                // fields are deduplicated, only the first occurrence is taken into account
-                let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
+                // Since we're updating the settings we can only add new fields at the end of the field id map
+                let mut new_fields_ids_map = old_fields_ids_map.clone();
+                let names = fields
+                    .iter()
+                    // fields are deduplicated, only the first occurrence is taken into account
+                    .unique()
+                    .map(String::as_str)
+                    .collect::<Vec<_>>();
 
                 // Add all the searchable attributes to the field map, and then add the
                 // remaining fields from the old field map to the new one
                 for name in names.iter() {
-                    new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
-                }
-
-                for (_, name) in old_fields_ids_map.iter() {
+                    // The fields ids map won't change the field id of already present elements thus only the
+                    // new fields will be inserted.
                     new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
                 }
 

From a0082c4df9f3cc5497678d4d6989dbba8674f31c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 14 May 2024 10:45:06 +0200
Subject: [PATCH 15/56] add a failing test on the attribute ranking rule

---
 milli/src/index.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/milli/src/index.rs b/milli/src/index.rs
index d0d148d86..accfff719 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -2662,6 +2662,7 @@ pub(crate) mod tests {
                 settings.set_filterable_fields(HashSet::from([S("age")]));
             })
             .unwrap();
+
         // The order of the field id map shouldn't change
         db_snap!(index, fields_ids_map, @r###"
         0   name             |
@@ -2676,4 +2677,54 @@ pub(crate) mod tests {
         3   1   |
         "###);
     }
+
+    #[test]
+    fn attribute_weights_after_swapping_searchable_attributes() {
+        // See https://github.com/meilisearch/meilisearch/issues/4484
+
+        let index = TempIndex::new();
+
+        index
+            .update_settings(|settings| {
+                settings.set_searchable_fields(vec![S("name"), S("beverage")]);
+            })
+            .unwrap();
+
+        index
+            .add_documents(documents!([
+                { "id": 0, "name": "kefir", "beverage": "water" },
+                { "id": 1, "name": "tamo",  "beverage": "kefir" }
+            ]))
+            .unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+        let mut search = index.search(&rtxn);
+        let results = search.query("kefir").execute().unwrap();
+
+        // We should find kefir the dog first
+        insta::assert_debug_snapshot!(results.documents_ids, @r###"
+        [
+            0,
+            1,
+        ]
+        "###);
+
+        index
+            .update_settings(|settings| {
+                settings.set_searchable_fields(vec![S("beverage"), S("name")]);
+            })
+            .unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+        let mut search = index.search(&rtxn);
+        let results = search.query("kefir").execute().unwrap();
+
+        // We should find tamo first
+        insta::assert_debug_snapshot!(results.documents_ids, @r###"
+        [
+            0,
+            1,
+        ]
+        "###);
+    }
 }

From caa6a7149ac6967580c6e17a4a62f06f64f8312a Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 14 May 2024 16:56:08 +0200
Subject: [PATCH 16/56] make the attribute ranking rule use the weights and fix
 the tests

---
 meilisearch/src/search_queue.rs               |   3 +
 milli/src/index.rs                            |   2 +-
 milli/src/search/new/mod.rs                   |  29 ++-
 .../search/new/ranking_rule_graph/fid/mod.rs  |  38 +--
 milli/src/search/new/tests/attribute_fid.rs   |  14 +-
 ...attribute_fid__attribute_fid_ngrams-4.snap | 244 ++++++++++++++++++
 milli/src/update/settings.rs                  |  12 +-
 7 files changed, 306 insertions(+), 36 deletions(-)
 create mode 100644 milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap

diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs
index 6d5044d20..0fe9a5a53 100644
--- a/meilisearch/src/search_queue.rs
+++ b/meilisearch/src/search_queue.rs
@@ -85,6 +85,9 @@ impl SearchQueue {
                 },
 
                 search_request = receive_new_searches.recv() => {
+                    if search_request.is_none() {
+                        continue;
+                    }
                     // this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
                     let search_request = search_request.unwrap();
                     if searches_running < usize::from(parallelism) && queue.is_empty() {
diff --git a/milli/src/index.rs b/milli/src/index.rs
index accfff719..49f78f3cd 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -2722,8 +2722,8 @@ pub(crate) mod tests {
         // We should find tamo first
         insta::assert_debug_snapshot!(results.documents_ids, @r###"
         [
-            0,
             1,
+            0,
         ]
         "###);
     }
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 90d971fa3..9a2ff5b02 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -76,7 +76,7 @@ impl<'ctx> SearchContext<'ctx> {
 
         let mut exact = Vec::new();
         let mut tolerant = Vec::new();
-        for (name, fid, weight) in searchable_fids {
+        for (_name, fid, weight) in searchable_fids {
             if exact_attributes_ids.contains(&fid) {
                 exact.push((fid, weight));
             } else {
@@ -96,22 +96,26 @@ impl<'ctx> SearchContext<'ctx> {
         })
     }
 
-    // TODO: TAMO continue here
     pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
-        if attributes_to_search_on.contains(&String::from("*")) {
-            return Ok(());
-        }
-
-        let fids_map = self.index.fields_ids_map(self.txn)?;
+        let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
         let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
         let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
 
+        let mut wildcard = false;
+
         let mut restricted_fids = SearchableFids::default();
         for field_name in attributes_to_search_on {
+            if field_name == "*" {
+                wildcard = true;
+                // we cannot early exit as we want to returns error in case of unknown fields
+                continue;
+            }
             let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
             let (fid, weight) = match searchable_weight {
                 // The Field id exist and the field is searchable
                 Some((_name, fid, weight)) => (*fid, *weight),
+                // The field is not searchable but the user didn't define any searchable attributes
+                None if user_defined_searchable.is_none() => continue,
                 // The field is not searchable => User error
                 None => {
                     let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
@@ -136,7 +140,16 @@ impl<'ctx> SearchContext<'ctx> {
             };
         }
 
-        self.searchable_fids = restricted_fids;
+        if wildcard {
+            let (exact, tolerant) = searchable_names
+                .iter()
+                .map(|(_name, fid, weight)| (*fid, *weight))
+                .partition(|(fid, _weight)| exact_attributes_ids.contains(fid));
+
+            self.searchable_fids = SearchableFids { tolerant, exact };
+        } else {
+            self.searchable_fids = restricted_fids;
+        }
 
         Ok(())
     }
diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
index cf65249de..e10f2fbab 100644
--- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
@@ -7,12 +7,12 @@ use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
 use crate::search::new::SearchContext;
-use crate::Result;
+use crate::{FieldId, Result};
 
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct FidCondition {
     term: LocatedQueryTermSubset,
-    fid: u16,
+    fid: Option<FieldId>,
 }
 
 pub enum FidGraph {}
@@ -26,13 +26,16 @@ impl RankingRuleGraphTrait for FidGraph {
         universe: &RoaringBitmap,
     ) -> Result<ComputedCondition> {
         let FidCondition { term, .. } = condition;
-        // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
-        let mut docids = compute_query_term_subset_docids_within_field_id(
-            ctx,
-            &term.term_subset,
-            condition.fid,
-        )?;
-        docids &= universe;
+
+        let docids = if let Some(fid) = condition.fid {
+            // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
+            let mut docids =
+                compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
+            docids &= universe;
+            docids
+        } else {
+            RoaringBitmap::new()
+        };
 
         Ok(ComputedCondition {
             docids,
@@ -68,24 +71,27 @@ impl RankingRuleGraphTrait for FidGraph {
             all_fields.extend(fields);
         }
 
+        let weights_map = ctx.index.fieldids_weights_map(ctx.txn)?;
+
         let mut edges = vec![];
         for fid in all_fields.iter().copied() {
+            let weight = weights_map.weight(fid).unwrap();
             edges.push((
-                fid as u32 * term.term_ids.len() as u32,
-                conditions_interner.insert(FidCondition { term: term.clone(), fid }),
+                weight as u32 * term.term_ids.len() as u32,
+                conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
             ));
         }
 
         // always lookup the max_fid if we don't already and add an artificial condition for max scoring
-        let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max();
+        let max_weight: Option<u16> = weights_map.max_weight();
 
-        if let Some(max_fid) = max_fid {
-            if !all_fields.contains(&max_fid) {
+        if let Some(max_weight) = max_weight {
+            if !all_fields.contains(&max_weight) {
                 edges.push((
-                    max_fid as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
+                    max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
                     conditions_interner.insert(FidCondition {
                         term: term.clone(), // TODO remove this ugly clone
-                        fid: max_fid,
+                        fid: None,
                     }),
                 ));
             }
diff --git a/milli/src/search/new/tests/attribute_fid.rs b/milli/src/search/new/tests/attribute_fid.rs
index 61b0a743b..c595887ba 100644
--- a/milli/src/search/new/tests/attribute_fid.rs
+++ b/milli/src/search/new/tests/attribute_fid.rs
@@ -132,17 +132,17 @@ fn test_attribute_fid_simple() {
 fn test_attribute_fid_ngrams() {
     let index = create_index();
     db_snap!(index, fields_ids_map, @r###"
-    0   title            |
-    1   description      |
-    2   plot             |
-    3   id               |
+    0   id               |
+    1   title            |
+    2   description      |
+    3   plot             |
     "###);
     db_snap!(index, searchable_fields, @r###"["title", "description", "plot"]"###);
     db_snap!(index, fieldids_weights_map, @r###"
     fid weight
-    0   0   |
-    1   1   |
-    2   2   |
+    1   0   |
+    2   1   |
+    3   2   |
     "###);
 
     let txn = index.read_txn().unwrap();
diff --git a/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap
new file mode 100644
index 000000000..930a21626
--- /dev/null
+++ b/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_fid__attribute_fid_ngrams-4.snap
@@ -0,0 +1,244 @@
+---
+source: milli/src/search/new/tests/attribute_fid.rs
+expression: "format!(\"{document_ids_scores:#?}\")"
+---
+[
+    (
+        2,
+        [
+            Fid(
+                Rank {
+                    rank: 19,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 91,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        6,
+        [
+            Fid(
+                Rank {
+                    rank: 15,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 81,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        5,
+        [
+            Fid(
+                Rank {
+                    rank: 14,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 79,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        4,
+        [
+            Fid(
+                Rank {
+                    rank: 13,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 77,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        3,
+        [
+            Fid(
+                Rank {
+                    rank: 12,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 83,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        9,
+        [
+            Fid(
+                Rank {
+                    rank: 11,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 75,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        8,
+        [
+            Fid(
+                Rank {
+                    rank: 10,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 79,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        7,
+        [
+            Fid(
+                Rank {
+                    rank: 10,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 73,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        11,
+        [
+            Fid(
+                Rank {
+                    rank: 7,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 77,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        10,
+        [
+            Fid(
+                Rank {
+                    rank: 6,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 81,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        13,
+        [
+            Fid(
+                Rank {
+                    rank: 6,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 81,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        12,
+        [
+            Fid(
+                Rank {
+                    rank: 6,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 78,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        14,
+        [
+            Fid(
+                Rank {
+                    rank: 5,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 75,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+    (
+        0,
+        [
+            Fid(
+                Rank {
+                    rank: 1,
+                    max_rank: 19,
+                },
+            ),
+            Position(
+                Rank {
+                    rank: 91,
+                    max_rank: 91,
+                },
+            ),
+        ],
+    ),
+]
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 6875e6f47..2e8ac157c 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -12,7 +12,6 @@ use time::OffsetDateTime;
 use super::index_documents::{IndexDocumentsConfig, Transform};
 use super::IndexerConfig;
 use crate::criterion::Criterion;
-use crate::documents::FieldIdMapper;
 use crate::error::UserError;
 use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
 use crate::order_by_map::OrderByMap;
@@ -1562,8 +1561,9 @@ mod tests {
         // we must find the appropriate document.
         let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap();
         let documents = index.documents(&rtxn, result.documents_ids).unwrap();
+        let fid_map = index.fields_ids_map(&rtxn).unwrap();
         assert_eq!(documents.len(), 1);
-        assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
+        assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
         drop(rtxn);
 
         // We change the searchable fields to be the "name" field only.
@@ -1575,12 +1575,16 @@ mod tests {
 
         // Check that the searchable field have been reset and documents are found now.
         let rtxn = index.read_txn().unwrap();
+        let fid_map = index.fields_ids_map(&rtxn).unwrap();
+        let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap();
+        snapshot!(format!("{user_defined_searchable_fields:?}"), @"None");
+        // the searchable fields should contain all the fields
         let searchable_fields = index.searchable_fields(&rtxn).unwrap();
-        snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###);
+        snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###);
         let result = index.search(&rtxn).query("23").execute().unwrap();
         assert_eq!(result.documents_ids.len(), 1);
         let documents = index.documents(&rtxn, result.documents_ids).unwrap();
-        assert_eq!(documents[0].1.get(0), Some(&br#""kevin""#[..]));
+        assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..]));
     }
 
     #[test]

From 9fffb8e83dd13cbe2d88655a258e5391b648d01e Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 14 May 2024 17:20:57 +0200
Subject: [PATCH 17/56] make clippy happy

---
 index-scheduler/src/utils.rs        | 4 ++--
 meilisearch/src/search.rs           | 2 +-
 milli/src/fieldids_weights_map.rs   | 2 +-
 milli/src/index.rs                  | 8 ++++----
 milli/src/search/new/bucket_sort.rs | 4 ++--
 milli/src/search/new/mod.rs         | 3 +--
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs
index 9f6f90db2..260ff6ee4 100644
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@@ -272,9 +272,9 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
     }
     for index_uid in index_uids {
         if index_uid == swap.0 {
-            *index_uid = swap.1.to_owned();
+            swap.1.clone_into(index_uid);
         } else if index_uid == swap.1 {
-            *index_uid = swap.0.to_owned();
+            swap.0.clone_into(index_uid);
         }
     }
 }
diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs
index a383434a2..34ebe463d 100644
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -730,7 +730,7 @@ pub fn perform_search(
         let mut ids = BTreeSet::new();
         for attr in attrs {
             if attr == "*" {
-                ids = displayed_ids.clone();
+                ids.clone_from(&displayed_ids);
                 break;
             }
 
diff --git a/milli/src/fieldids_weights_map.rs b/milli/src/fieldids_weights_map.rs
index bead160e9..fdfe8fba2 100644
--- a/milli/src/fieldids_weights_map.rs
+++ b/milli/src/fieldids_weights_map.rs
@@ -26,7 +26,7 @@ impl FieldidsWeightsMap {
         self.map.values().copied().max()
     }
 
-    pub fn ids<'a>(&'a self) -> impl Iterator<Item = FieldId> + 'a {
+    pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
         self.map.keys().copied()
     }
 }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 49f78f3cd..7fe9da0ff 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -632,7 +632,7 @@ impl Index {
         // Special case if there is no user defined fields.
         // Then the whole field id map is marked as searchable.
         if user_fields.is_none() {
-            let mut weights = self.fieldids_weights_map(&wtxn)?;
+            let mut weights = self.fieldids_weights_map(wtxn)?;
             let mut searchable = Vec::new();
             for (weight, (fid, name)) in fields_ids_map.iter().enumerate() {
                 searchable.push(name);
@@ -648,7 +648,7 @@ impl Index {
         // We can write the user defined searchable fields as-is.
         self.put_user_defined_searchable_fields(wtxn, user_fields)?;
 
-        let mut weights = self.fieldids_weights_map(&wtxn)?;
+        let mut weights = self.fieldids_weights_map(wtxn)?;
 
         // Now we generate the real searchable fields:
         // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
@@ -666,7 +666,7 @@ impl Index {
 
                     let weight: u16 =
                         weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
-                    weights.insert(id, weight as u16);
+                    weights.insert(id, weight);
                 }
             }
         }
@@ -701,7 +701,7 @@ impl Index {
         self.main
             .remap_types::<Str, SerdeBincode<Vec<&'t str>>>()
             .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)?
-            .map(|fields| Ok(fields.into_iter().map(|field| Cow::Borrowed(field)).collect()))
+            .map(|fields| Ok(fields.into_iter().map(Cow::Borrowed).collect()))
             .unwrap_or_else(|| {
                 Ok(self
                     .fields_ids_map(rtxn)?
diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs
index 521fcb983..e9bc5449d 100644
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -101,7 +101,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 
     let mut ranking_rule_universes: Vec<RoaringBitmap> =
         vec![RoaringBitmap::default(); ranking_rules_len];
-    ranking_rule_universes[0] = universe.clone();
+    ranking_rule_universes[0].clone_from(universe);
     let mut cur_ranking_rule_index = 0;
 
     /// Finish iterating over the current ranking rule, yielding
@@ -232,7 +232,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
         }
 
         cur_ranking_rule_index += 1;
-        ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone();
+        ranking_rule_universes[cur_ranking_rule_index].clone_from(&next_bucket.candidates);
         logger.start_iteration_ranking_rule(
             cur_ranking_rule_index,
             ranking_rules[cur_ranking_rule_index].as_ref(),
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 9a2ff5b02..b7514cbb5 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -178,8 +178,7 @@ pub struct SearchableFids {
 
 impl SearchableFids {
     pub fn contains(&self, fid: &FieldId) -> bool {
-        self.tolerant.iter().find(|(id, _)| id == fid).is_some()
-            || self.exact.iter().find(|(id, _)| id == fid).is_some()
+        self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid)
     }
 }
 

From 7ec4e2a3fbb89821f3a153a9adee05e405183720 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 15 May 2024 15:02:26 +0200
Subject: [PATCH 18/56] apply all style review comments

---
 meilisearch/src/search_queue.rs               | 12 +++++----
 milli/src/error.rs                            |  2 ++
 milli/src/fieldids_weights_map.rs             |  9 +++++++
 milli/src/index.rs                            | 27 ++++++++++---------
 .../search/new/ranking_rule_graph/fid/mod.rs  | 11 ++++----
 milli/src/update/settings.rs                  | 20 +++++---------
 6 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/meilisearch/src/search_queue.rs b/meilisearch/src/search_queue.rs
index 0fe9a5a53..415da0c15 100644
--- a/meilisearch/src/search_queue.rs
+++ b/meilisearch/src/search_queue.rs
@@ -85,11 +85,13 @@ impl SearchQueue {
                 },
 
                 search_request = receive_new_searches.recv() => {
-                    if search_request.is_none() {
-                        continue;
-                    }
-                    // this unwrap is safe because we're sure the `SearchQueue` still lives somewhere in actix-web
-                    let search_request = search_request.unwrap();
+                    let search_request = match search_request {
+                        Some(search_request) => search_request,
+                        // This should never happen while actix-web is running, but it's not a reason to crash
+                        // and it can generate a lot of noise in the tests.
+                        None => continue,
+                    };
+
                     if searches_running < usize::from(parallelism) && queue.is_empty() {
                         searches_running += 1;
                         // if the search requests die it's not a hard error on our side
diff --git a/milli/src/error.rs b/milli/src/error.rs
index e4550de1f..009781fcf 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -32,6 +32,8 @@ pub enum InternalError {
     DatabaseClosing,
     #[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
     DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
+    #[error("Missing {key} in the fieldids weights mapping.")]
+    FieldidsWeightsMapMissingEntry { key: FieldId },
     #[error(transparent)]
     FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
     #[error("Missing {key} in the field id mapping.")]
diff --git a/milli/src/fieldids_weights_map.rs b/milli/src/fieldids_weights_map.rs
index fdfe8fba2..72720a02a 100644
--- a/milli/src/fieldids_weights_map.rs
+++ b/milli/src/fieldids_weights_map.rs
@@ -1,3 +1,5 @@
+//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
+
 use std::collections::HashMap;
 
 use serde::{Deserialize, Serialize};
@@ -10,22 +12,29 @@ pub struct FieldidsWeightsMap {
 }
 
 impl FieldidsWeightsMap {
+    /// Insert a field id -> weigth into the map.
+    /// If the map did not have this key present, `None` is returned.
+    /// If the map did have this key present, the value is updated, and the old value is returned.
     pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
         self.map.insert(fid, weight)
     }
 
+    /// Removes a field id from the map, returning the associated weight previously in the map.
     pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
         self.map.remove(&fid)
     }
 
+    /// Returns weight corresponding to the key.
     pub fn weight(&self, fid: FieldId) -> Option<Weight> {
         self.map.get(&fid).copied()
     }
 
+    /// Returns highest weight contained in the map if any.
     pub fn max_weight(&self) -> Option<Weight> {
         self.map.values().copied().max()
     }
 
+    /// Return an iterator visiting all field ids in arbitrary order.
     pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
         self.map.keys().copied()
     }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 7fe9da0ff..c565cdd5b 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -26,9 +26,9 @@ use crate::proximity::ProximityPrecision;
 use crate::vector::EmbeddingConfig;
 use crate::{
     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
-    FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap,
-    GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec,
-    Weight, BEU16, BEU32, BEU64,
+    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
+    FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
+    Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
 };
 
 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -446,22 +446,25 @@ impl Index {
     pub fn searchable_fields_and_weights<'a>(
         &self,
         rtxn: &'a RoTxn,
-    ) -> heed::Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
+    ) -> Result<Vec<(Cow<'a, str>, FieldId, Weight)>> {
         let fid_map = self.fields_ids_map(rtxn)?;
         let weight_map = self.fieldids_weights_map(rtxn)?;
         let searchable = self.searchable_fields(rtxn)?;
 
-        Ok(searchable
+        searchable
             .into_iter()
-            .map(|field| {
-                // the searchable attributes are a subset of the field id map
-                let fid = fid_map.id(&field).unwrap();
-                // all the searchable fields have a weight
-                let weight = weight_map.weight(fid).unwrap();
+            .map(|field| -> Result<_> {
+                let fid = fid_map.id(&field).ok_or_else(|| FieldIdMapMissingEntry::FieldName {
+                    field_name: field.to_string(),
+                    process: "searchable_fields_and_weights",
+                })?;
+                let weight = weight_map
+                    .weight(fid)
+                    .ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
 
-                (field, fid, weight)
+                Ok((field, fid, weight))
             })
-            .collect())
+            .collect()
     }
 
     /* geo rtree */
diff --git a/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
index e10f2fbab..a4a08ea46 100644
--- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
@@ -7,7 +7,7 @@ use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::resolve_query_graph::compute_query_term_subset_docids_within_field_id;
 use crate::search::new::SearchContext;
-use crate::{FieldId, Result};
+use crate::{FieldId, InternalError, Result};
 
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub struct FidCondition {
@@ -29,10 +29,9 @@ impl RankingRuleGraphTrait for FidGraph {
 
         let docids = if let Some(fid) = condition.fid {
             // maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
-            let mut docids =
+            let docids =
                 compute_query_term_subset_docids_within_field_id(ctx, &term.term_subset, fid)?;
-            docids &= universe;
-            docids
+            docids & universe
         } else {
             RoaringBitmap::new()
         };
@@ -75,7 +74,9 @@ impl RankingRuleGraphTrait for FidGraph {
 
         let mut edges = vec![];
         for fid in all_fields.iter().copied() {
-            let weight = weights_map.weight(fid).unwrap();
+            let weight = weights_map
+                .weight(fid)
+                .ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
             edges.push((
                 weight as u32 * term.term_ids.len() as u32,
                 conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 2e8ac157c..c66148813 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -475,33 +475,25 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                     return Ok(false);
                 }
 
-                // every time the searchable attributes are updated, we need to update the
-                // ids for any settings that uses the facets. (distinct_fields, filterable_fields).
-                let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
-
                 // Since we're updating the settings we can only add new fields at the end of the field id map
-                let mut new_fields_ids_map = old_fields_ids_map.clone();
-                let names = fields
-                    .iter()
-                    // fields are deduplicated, only the first occurrence is taken into account
-                    .unique()
-                    .map(String::as_str)
-                    .collect::<Vec<_>>();
+                let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
+                // fields are deduplicated, only the first occurrence is taken into account
+                let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
 
                 // Add all the searchable attributes to the field map, and then add the
                 // remaining fields from the old field map to the new one
                 for name in names.iter() {
                     // The fields ids map won't change the field id of already present elements thus only the
                     // new fields will be inserted.
-                    new_fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
+                    fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
                 }
 
                 self.index.put_all_searchable_fields_from_fields_ids_map(
                     self.wtxn,
                     Some(&names),
-                    &new_fields_ids_map,
+                    &fields_ids_map,
                 )?;
-                self.index.put_fields_ids_map(self.wtxn, &new_fields_ids_map)?;
+                self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
                 Ok(true)
             }
             Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),

From ad4d8502b3583f734f7508dea2e14656a8dea946 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 15 May 2024 17:16:10 +0200
Subject: [PATCH 19/56] stops storing the whole fieldids weights map when no
 searchable are defined

---
 milli/src/fieldids_weights_map.rs |  9 ++++++-
 milli/src/index.rs                | 36 ++++++++++++----------------
 milli/src/update/settings.rs      | 40 +++++++++++++++++++++++++------
 3 files changed, 56 insertions(+), 29 deletions(-)

diff --git a/milli/src/fieldids_weights_map.rs b/milli/src/fieldids_weights_map.rs
index 72720a02a..5ca2a6146 100644
--- a/milli/src/fieldids_weights_map.rs
+++ b/milli/src/fieldids_weights_map.rs
@@ -4,7 +4,7 @@ use std::collections::HashMap;
 
 use serde::{Deserialize, Serialize};
 
-use crate::{FieldId, Weight};
+use crate::{FieldId, FieldsIdsMap, Weight};
 
 #[derive(Debug, Default, Serialize, Deserialize)]
 pub struct FieldidsWeightsMap {
@@ -19,6 +19,13 @@ impl FieldidsWeightsMap {
         self.map.insert(fid, weight)
     }
 
+    /// Create the map from the fields ids maps.
+    /// Should only be called in the case there are NO searchable attributes.
+    /// The weights and the fields ids will have the same values.
+    pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
+        FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, fid)).collect() }
+    }
+
     /// Removes a field id from the map, returning the associated weight previously in the map.
     pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
         self.map.remove(&fid)
diff --git a/milli/src/index.rs b/milli/src/index.rs
index c565cdd5b..36f0b339e 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -436,11 +436,20 @@ impl Index {
 
     /// Get the fieldids weights map which associates the field ids to their weights
     pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> {
-        Ok(self
-            .main
+        self.main
             .remap_types::<Str, SerdeJson<_>>()
             .get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)?
-            .unwrap_or_default())
+            .map(Ok)
+            .unwrap_or_else(|| {
+                Ok(FieldidsWeightsMap::from_field_id_map_without_searchable(
+                    &self.fields_ids_map(rtxn)?,
+                ))
+            })
+    }
+
+    /// Delete the fieldsids weights map
+    pub fn delete_fieldids_weights_map(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
     }
 
     pub fn searchable_fields_and_weights<'a>(
@@ -629,29 +638,13 @@ impl Index {
     pub(crate) fn put_all_searchable_fields_from_fields_ids_map(
         &self,
         wtxn: &mut RwTxn,
-        user_fields: Option<&[&str]>,
+        user_fields: &[&str],
         fields_ids_map: &FieldsIdsMap,
     ) -> Result<()> {
-        // Special case if there is no user defined fields.
-        // Then the whole field id map is marked as searchable.
-        if user_fields.is_none() {
-            let mut weights = self.fieldids_weights_map(wtxn)?;
-            let mut searchable = Vec::new();
-            for (weight, (fid, name)) in fields_ids_map.iter().enumerate() {
-                searchable.push(name);
-                weights.insert(fid, weight as u16);
-            }
-            self.put_searchable_fields(wtxn, &searchable)?;
-            self.put_fieldids_weights_map(wtxn, &weights)?;
-            return Ok(());
-        }
-
-        let user_fields = user_fields.unwrap();
-
         // We can write the user defined searchable fields as-is.
         self.put_user_defined_searchable_fields(wtxn, user_fields)?;
 
-        let mut weights = self.fieldids_weights_map(wtxn)?;
+        let mut weights = FieldidsWeightsMap::default();
 
         // Now we generate the real searchable fields:
         // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
@@ -682,6 +675,7 @@ impl Index {
     pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
         let did_delete_searchable = self.delete_searchable_fields(wtxn)?;
         let did_delete_user_defined = self.delete_user_defined_searchable_fields(wtxn)?;
+        self.delete_fieldids_weights_map(wtxn)?;
         Ok(did_delete_searchable || did_delete_user_defined)
     }
 
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index c66148813..046644dc4 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -490,7 +490,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 
                 self.index.put_all_searchable_fields_from_fields_ids_map(
                     self.wtxn,
-                    Some(&names),
+                    &names,
                     &fields_ids_map,
                 )?;
                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
@@ -1228,11 +1228,13 @@ impl InnerIndexSettings {
             .map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
 
         // in case new fields were introduced we're going to recreate the searchable fields.
-        index.put_all_searchable_fields_from_fields_ids_map(
-            wtxn,
-            searchable_fields.as_deref(),
-            &self.fields_ids_map,
-        )?;
+        if let Some(searchable_fields) = searchable_fields {
+            index.put_all_searchable_fields_from_fields_ids_map(
+                wtxn,
+                &searchable_fields,
+                &self.fields_ids_map,
+            )?;
+        }
         let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
         self.searchable_fields_ids = searchable_fields_ids;
 
@@ -1513,7 +1515,7 @@ mod tests {
     use crate::error::Error;
     use crate::index::tests::TempIndex;
     use crate::update::ClearDocuments;
-    use crate::{Criterion, Filter, SearchResult};
+    use crate::{db_snap, Criterion, Filter, SearchResult};
 
     #[test]
     fn set_and_reset_searchable_fields() {
@@ -1542,6 +1544,17 @@ mod tests {
 
         wtxn.commit().unwrap();
 
+        db_snap!(index, fields_ids_map, @r###"
+        0   id               |
+        1   name             |
+        2   age              |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["name"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        1   0   |
+        "###);
+
         // Check that the searchable field is correctly set to "name" only.
         let rtxn = index.read_txn().unwrap();
         // When we search for something that is not in
@@ -1565,6 +1578,19 @@ mod tests {
             })
             .unwrap();
 
+        db_snap!(index, fields_ids_map, @r###"
+        0   id               |
+        1   name             |
+        2   age              |
+        "###);
+        db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###);
+        db_snap!(index, fieldids_weights_map, @r###"
+        fid weight
+        0   0   |
+        1   1   |
+        2   2   |
+        "###);
+
         // Check that the searchable field have been reset and documents are found now.
         let rtxn = index.read_txn().unwrap();
         let fid_map = index.fields_ids_map(&rtxn).unwrap();

From 5542f1d9f11c2c6d7ad691af11ed1b5177a13168 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 15 May 2024 18:00:39 +0200
Subject: [PATCH 20/56] get back to what we were doingb efore in the DB cache
 and with the restricted field id

---
 milli/src/search/new/db_cache.rs | 140 ++++++++++++++++++++-----------
 milli/src/search/new/mod.rs      |  19 ++---
 2 files changed, 99 insertions(+), 60 deletions(-)

diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs
index 4985f55e9..4fa0765e0 100644
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@@ -159,36 +159,58 @@ impl<'ctx> SearchContext<'ctx> {
 
     /// Retrieve or insert the given value in the `word_docids` database.
     fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
-        let interned = self.word_interner.get(word).as_str();
-        let keys: Vec<_> =
-            self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(word).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
 
-        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            word,
-            &keys[..],
-            &mut self.db_cache.word_docids,
-            self.index.word_fid_docids.remap_data_type::<Bytes>(),
-            merge_cbo_roaring_bitmaps,
-        )
+                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+                    self.txn,
+                    word,
+                    &keys[..],
+                    &mut self.db_cache.word_docids,
+                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
+                    merge_cbo_roaring_bitmaps,
+                )
+            }
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+                self.txn,
+                word,
+                self.word_interner.get(word).as_str(),
+                &mut self.db_cache.word_docids,
+                self.index.word_docids.remap_data_type::<Bytes>(),
+            ),
+        }
     }
 
     fn get_db_exact_word_docids(
         &mut self,
         word: Interned<String>,
     ) -> Result<Option<RoaringBitmap>> {
-        let interned = self.word_interner.get(word).as_str();
-        let keys: Vec<_> =
-            self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(word).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
 
-        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            word,
-            &keys[..],
-            &mut self.db_cache.exact_word_docids,
-            self.index.word_fid_docids.remap_data_type::<Bytes>(),
-            merge_cbo_roaring_bitmaps,
-        )
+                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+                    self.txn,
+                    word,
+                    &keys[..],
+                    &mut self.db_cache.exact_word_docids,
+                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
+                    merge_cbo_roaring_bitmaps,
+                )
+            }
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+                self.txn,
+                word,
+                self.word_interner.get(word).as_str(),
+                &mut self.db_cache.exact_word_docids,
+                self.index.exact_word_docids.remap_data_type::<Bytes>(),
+            ),
+        }
     }
 
     pub fn word_prefix_docids(&mut self, prefix: Word) -> Result<Option<RoaringBitmap>> {
@@ -216,36 +238,58 @@ impl<'ctx> SearchContext<'ctx> {
         &mut self,
         prefix: Interned<String>,
     ) -> Result<Option<RoaringBitmap>> {
-        let interned = self.word_interner.get(prefix).as_str();
-        let keys: Vec<_> =
-            self.searchable_fids.tolerant.iter().map(|(fid, _weight)| (interned, *fid)).collect();
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(prefix).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
 
-        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            prefix,
-            &keys[..],
-            &mut self.db_cache.word_prefix_docids,
-            self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
-            merge_cbo_roaring_bitmaps,
-        )
+                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+                    self.txn,
+                    prefix,
+                    &keys[..],
+                    &mut self.db_cache.word_prefix_docids,
+                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+                    merge_cbo_roaring_bitmaps,
+                )
+            }
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+                self.txn,
+                prefix,
+                self.word_interner.get(prefix).as_str(),
+                &mut self.db_cache.word_prefix_docids,
+                self.index.word_prefix_docids.remap_data_type::<Bytes>(),
+            ),
+        }
     }
 
     fn get_db_exact_word_prefix_docids(
         &mut self,
         prefix: Interned<String>,
     ) -> Result<Option<RoaringBitmap>> {
-        let interned = self.word_interner.get(prefix).as_str();
-        let keys: Vec<_> =
-            self.searchable_fids.exact.iter().map(|(fid, _weight)| (interned, *fid)).collect();
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(prefix).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
 
-        DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            prefix,
-            &keys[..],
-            &mut self.db_cache.exact_word_prefix_docids,
-            self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
-            merge_cbo_roaring_bitmaps,
-        )
+                DatabaseCache::get_value_from_keys::<_, _, CboRoaringBitmapCodec>(
+                    self.txn,
+                    prefix,
+                    &keys[..],
+                    &mut self.db_cache.exact_word_prefix_docids,
+                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+                    merge_cbo_roaring_bitmaps,
+                )
+            }
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
+                self.txn,
+                prefix,
+                self.word_interner.get(prefix).as_str(),
+                &mut self.db_cache.exact_word_prefix_docids,
+                self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
+            ),
+        }
     }
 
     pub fn get_db_word_pair_proximity_docids(
@@ -421,8 +465,8 @@ impl<'ctx> SearchContext<'ctx> {
         word: Interned<String>,
         fid: u16,
     ) -> Result<Option<RoaringBitmap>> {
-        // if the requested fid isn't in the list of searchable, return None.
-        if !self.searchable_fids.contains(&fid) {
+        // if the requested fid isn't in the restricted list, return None.
+        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
             return Ok(None);
         }
 
@@ -440,8 +484,8 @@ impl<'ctx> SearchContext<'ctx> {
         word_prefix: Interned<String>,
         fid: u16,
     ) -> Result<Option<RoaringBitmap>> {
-        // if the requested fid isn't in the searchable list, return None.
-        if !self.searchable_fids.contains(&fid) {
+        // if the requested fid isn't in the restricted list, return None.
+        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
             return Ok(None);
         }
 
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index b7514cbb5..2cea96fce 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -66,7 +66,7 @@ pub struct SearchContext<'ctx> {
     pub phrase_interner: DedupInterner<Phrase>,
     pub term_interner: Interner<QueryTerm>,
     pub phrase_docids: PhraseDocIdsCache,
-    pub searchable_fids: SearchableFids,
+    pub restricted_fids: Option<RestrictedFids>,
 }
 
 impl<'ctx> SearchContext<'ctx> {
@@ -92,7 +92,7 @@ impl<'ctx> SearchContext<'ctx> {
             phrase_interner: <_>::default(),
             term_interner: <_>::default(),
             phrase_docids: <_>::default(),
-            searchable_fids: SearchableFids { tolerant, exact },
+            restricted_fids: None,
         })
     }
 
@@ -103,7 +103,7 @@ impl<'ctx> SearchContext<'ctx> {
 
         let mut wildcard = false;
 
-        let mut restricted_fids = SearchableFids::default();
+        let mut restricted_fids = RestrictedFids::default();
         for field_name in attributes_to_search_on {
             if field_name == "*" {
                 wildcard = true;
@@ -141,14 +141,9 @@ impl<'ctx> SearchContext<'ctx> {
         }
 
         if wildcard {
-            let (exact, tolerant) = searchable_names
-                .iter()
-                .map(|(_name, fid, weight)| (*fid, *weight))
-                .partition(|(fid, _weight)| exact_attributes_ids.contains(fid));
-
-            self.searchable_fids = SearchableFids { tolerant, exact };
+            self.restricted_fids = None;
         } else {
-            self.searchable_fids = restricted_fids;
+            self.restricted_fids = Some(restricted_fids);
         }
 
         Ok(())
@@ -171,12 +166,12 @@ impl Word {
 }
 
 #[derive(Debug, Clone, Default)]
-pub struct SearchableFids {
+pub struct RestrictedFids {
     pub tolerant: Vec<(FieldId, Weight)>,
     pub exact: Vec<(FieldId, Weight)>,
 }
 
-impl SearchableFids {
+impl RestrictedFids {
     pub fn contains(&self, fid: &FieldId) -> bool {
         self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid)
     }

From c78a2fa4f5dfdf9dc487d32ce7df6a52a2b02c64 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 15 May 2024 18:04:42 +0200
Subject: [PATCH 21/56] rename method and variable around the attributes to
 search on feature

---
 milli/src/search/mod.rs     |  2 +-
 milli/src/search/new/mod.rs | 12 ++++++++----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 7427db3a1..ca0eda49e 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -158,7 +158,7 @@ impl<'a> Search<'a> {
         let mut ctx = SearchContext::new(self.index, self.rtxn)?;
 
         if let Some(searchable_attributes) = self.searchable_attributes {
-            ctx.searchable_attributes(searchable_attributes)?;
+            ctx.attributes_to_search_on(searchable_attributes)?;
         }
 
         let universe = filtered_universe(&ctx, &self.filter)?;
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 2cea96fce..5e4c2f829 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -96,9 +96,12 @@ impl<'ctx> SearchContext<'ctx> {
         })
     }
 
-    pub fn searchable_attributes(&mut self, attributes_to_search_on: &'ctx [String]) -> Result<()> {
+    pub fn attributes_to_search_on(
+        &mut self,
+        attributes_to_search_on: &'ctx [String],
+    ) -> Result<()> {
         let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
-        let searchable_names = self.index.searchable_fields_and_weights(self.txn)?;
+        let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
         let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
 
         let mut wildcard = false;
@@ -110,7 +113,8 @@ impl<'ctx> SearchContext<'ctx> {
                 // we cannot early exit as we want to returns error in case of unknown fields
                 continue;
             }
-            let searchable_weight = searchable_names.iter().find(|(name, _, _)| name == field_name);
+            let searchable_weight =
+                searchable_fields_weights.iter().find(|(name, _, _)| name == field_name);
             let (fid, weight) = match searchable_weight {
                 // The Field id exist and the field is searchable
                 Some((_name, fid, weight)) => (*fid, *weight),
@@ -120,7 +124,7 @@ impl<'ctx> SearchContext<'ctx> {
                 None => {
                     let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
                         self.txn,
-                        searchable_names.iter().map(|(name, _, _)| name),
+                        searchable_fields_weights.iter().map(|(name, _, _)| name),
                     )?;
 
                     let field = field_name.to_string();

From f2d0a59f1da3a83875e57a38fb5c45e0af993b3f Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 16 May 2024 01:06:33 +0200
Subject: [PATCH 22/56] when no searchable attributes are defined, makes all
 the weight equals to zero

---
 meilisearch/tests/search/hybrid.rs              | 8 ++++----
 meilisearch/tests/search/mod.rs                 | 2 +-
 meilisearch/tests/search/restrict_searchable.rs | 4 ++--
 milli/src/fieldids_weights_map.rs               | 4 ++--
 milli/src/index.rs                              | 6 +++---
 milli/src/update/settings.rs                    | 4 ++--
 6 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index 68ae4c0aa..67f7909b9 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -85,8 +85,8 @@ async fn simple_search() {
         )
         .await;
     snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
-    snapshot!(response["semanticHitCount"], @"1");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9472135901451112}]"###);
+    snapshot!(response["semanticHitCount"], @"2");
 
     let (response, code) = index
         .search_post(
@@ -331,7 +331,7 @@ async fn query_combination() {
     .await;
 
     snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.996969696969697},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.996969696969697},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.8848484848484849}]"###);
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":0.9242424242424242}]"###);
     snapshot!(response["semanticHitCount"], @"null");
 
     // query + vector, no hybrid keyword =>
@@ -374,6 +374,6 @@ async fn query_combination() {
         .await;
 
     snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9848484848484848}]"###);
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_rankingScore":0.9242424242424242}]"###);
     snapshot!(response["semanticHitCount"], @"0");
 }
diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index b4350f686..f601e2b03 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -921,7 +921,7 @@ async fn test_score_details() {
                         "order": 3,
                         "attributeRankingOrderScore": 1.0,
                         "queryWordDistanceScore": 0.8095238095238095,
-                        "score": 0.9727891156462584
+                        "score": 0.8095238095238095
                       },
                       "exactness": {
                         "order": 4,
diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs
index 7bbdca38f..f52efa1f4 100644
--- a/meilisearch/tests/search/restrict_searchable.rs
+++ b/meilisearch/tests/search/restrict_searchable.rs
@@ -285,10 +285,10 @@ async fn attributes_ranking_rule_order() {
                 @r###"
             [
               {
-                "id": "2"
+                "id": "1"
               },
               {
-                "id": "1"
+                "id": "2"
               }
             ]
             "###
diff --git a/milli/src/fieldids_weights_map.rs b/milli/src/fieldids_weights_map.rs
index 5ca2a6146..a737632a4 100644
--- a/milli/src/fieldids_weights_map.rs
+++ b/milli/src/fieldids_weights_map.rs
@@ -21,9 +21,9 @@ impl FieldidsWeightsMap {
 
     /// Create the map from the fields ids maps.
     /// Should only be called in the case there are NO searchable attributes.
-    /// The weights and the fields ids will have the same values.
+    /// All the fields will be inserted in the order of the fields ids map with a weight of 0.
     pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
-        FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, fid)).collect() }
+        FieldidsWeightsMap { map: fid_map.ids().map(|fid| (fid, 0)).collect() }
     }
 
     /// Removes a field id from the map, returning the associated weight previously in the map.
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 36f0b339e..42b9cb111 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -2492,7 +2492,7 @@ pub(crate) mod tests {
         db_snap!(index, fieldids_weights_map, @r###"
         fid weight
         0   0   |
-        1   1   |
+        1   0   |
         "###);
 
         index.delete_documents(Default::default());
@@ -2512,7 +2512,7 @@ pub(crate) mod tests {
         db_snap!(index, fieldids_weights_map, @r###"
         fid weight
         0   0   |
-        1   1   |
+        1   0   |
         "###);
 
         index
@@ -2537,7 +2537,7 @@ pub(crate) mod tests {
         db_snap!(index, fieldids_weights_map, @r###"
         fid weight
         0   0   |
-        1   1   |
+        1   0   |
         "###);
 
         let rtxn = index.read_txn().unwrap();
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 046644dc4..0599bb9d8 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1587,8 +1587,8 @@ mod tests {
         db_snap!(index, fieldids_weights_map, @r###"
         fid weight
         0   0   |
-        1   1   |
-        2   2   |
+        1   0   |
+        2   0   |
         "###);
 
         // Check that the searchable field have been reset and documents are found now.

From 673b6e1dc0f9ad6d688c5f8da7295d1f4e041c5f Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 16 May 2024 11:28:14 +0200
Subject: [PATCH 23/56] fix a flaky test

---
 meilisearch/tests/snapshot/mod.rs | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/meilisearch/tests/snapshot/mod.rs b/meilisearch/tests/snapshot/mod.rs
index 1312aa9ca..67e80f45b 100644
--- a/meilisearch/tests/snapshot/mod.rs
+++ b/meilisearch/tests/snapshot/mod.rs
@@ -1,6 +1,5 @@
 use std::time::Duration;
 
-use actix_rt::time::sleep;
 use meili_snap::{json_string, snapshot};
 use meilisearch::option::ScheduleSnapshot;
 use meilisearch::Opt;
@@ -53,11 +52,29 @@ async fn perform_snapshot() {
 
     index.load_test_set().await;
 
-    server.index("test1").create(Some("prim")).await;
+    let (task, code) = server.index("test1").create(Some("prim")).await;
+    meili_snap::snapshot!(code, @"202 Accepted");
 
-    index.wait_task(2).await;
+    index.wait_task(task.uid()).await;
 
-    sleep(Duration::from_secs(2)).await;
+    // wait for the _next task_ to process, aka the snapshot that should be enqueued at some point
+
+    println!("waited for the next task to finish");
+    let now = std::time::Instant::now();
+    let next_task = task.uid() + 1;
+    loop {
+        let (value, code) = index.get_task(next_task).await;
+        dbg!(&value);
+        if code != 404 && value["status"].as_str() == Some("succeeded") {
+            break;
+        }
+
+        if now.elapsed() > Duration::from_secs(30) {
+            panic!("The snapshot didn't schedule in 30s even though it was supposed to be scheduled every 2s: {}",
+                serde_json::to_string_pretty(&value).unwrap()
+            );
+        }
+    }
 
     let temp = tempfile::tempdir().unwrap();
 

From 8e6ffbfc6f55580784d9322af0453b874fe5cb0e Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 28 Mar 2024 18:22:31 +0100
Subject: [PATCH 24/56] stream documents

---
 Cargo.lock                                  |  12 +--
 meilisearch/Cargo.toml                      |   1 +
 meilisearch/src/routes/indexes/documents.rs | 114 ++++++++++++++------
 meilisearch/src/routes/mod.rs               |  28 +++--
 4 files changed, 107 insertions(+), 48 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 937fce64a..5d87830a5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3348,6 +3348,7 @@ dependencies = [
  "rayon",
  "regex",
  "reqwest",
+ "roaring",
  "rustls 0.21.12",
  "rustls-pemfile",
  "segment",
@@ -4416,12 +4417,6 @@ dependencies = [
  "winreg",
 ]
 
-[[package]]
-name = "retain_mut"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
-
 [[package]]
 name = "ring"
 version = "0.17.8"
@@ -4439,13 +4434,12 @@ dependencies = [
 
 [[package]]
 name = "roaring"
-version = "0.10.2"
+version = "0.10.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
+checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf"
 dependencies = [
  "bytemuck",
  "byteorder",
- "retain_mut",
  "serde",
 ]
 
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index ed62c5f48..612c6731b 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -108,6 +108,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.9"
 build-info = { version = "1.7.0", path = "../build-info" }
+roaring = "0.10.3"
 
 [dev-dependencies]
 actix-rt = "2.9.0"
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 43fab1dae..78af7a098 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -1,12 +1,14 @@
-use std::io::ErrorKind;
+use std::io::{ErrorKind, Write};
 
 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
 use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
 use bstr::ByteSlice as _;
+use bytes::Bytes;
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
+use futures_util::Stream;
 use index_scheduler::{IndexScheduler, TaskId};
 use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
@@ -22,7 +24,9 @@ use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
 use mime::Mime;
 use once_cell::sync::Lazy;
-use serde::Deserialize;
+use roaring::RoaringBitmap;
+use serde::ser::SerializeSeq;
+use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use tempfile::tempfile;
 use tokio::fs::File;
@@ -230,6 +234,34 @@ pub async fn get_documents(
     documents_by_query(&index_scheduler, index_uid, query)
 }
 
+pub struct Writer2Streamer {
+    sender: tokio::sync::mpsc::Sender<Result<Bytes, anyhow::Error>>,
+}
+
+impl Write for Writer2Streamer {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        self.sender.blocking_send(Ok(buf.to_vec().into())).map_err(std::io::Error::other)?;
+        Ok(buf.len())
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+
+pub fn stream(
+    data: impl Serialize + Send + Sync + 'static,
+) -> impl Stream<Item = Result<Bytes, anyhow::Error>> {
+    let (sender, receiver) = tokio::sync::mpsc::channel::<Result<Bytes, anyhow::Error>>(1);
+
+    tokio::task::spawn_blocking(move || {
+        serde_json::to_writer(std::io::BufWriter::new(Writer2Streamer { sender }), &data)
+    });
+    futures_util::stream::unfold(receiver, |mut receiver| async {
+        receiver.recv().await.map(|value| (value, receiver))
+    })
+}
+
 fn documents_by_query(
     index_scheduler: &IndexScheduler,
     index_uid: web::Path<String>,
@@ -239,12 +271,13 @@ fn documents_by_query(
     let BrowseQuery { offset, limit, fields, filter } = query;
 
     let index = index_scheduler.index(&index_uid)?;
-    let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
+    let documents = retrieve_documents(index, offset, limit, filter, fields)?;
 
-    let ret = PaginationView::new(offset, limit, total as usize, documents);
+    let ret = PaginationView::new(offset, limit, documents.total_documents as usize, documents);
 
     debug!(returns = ?ret, "Get documents");
-    Ok(HttpResponse::Ok().json(ret))
+
+    Ok(HttpResponse::Ok().streaming(stream(ret)))
 }
 
 #[derive(Deserialize, Debug, Deserr)]
@@ -590,13 +623,46 @@ fn some_documents<'a, 't: 'a>(
     }))
 }
 
-fn retrieve_documents<S: AsRef<str>>(
-    index: &Index,
+pub struct DocumentsStreamer {
+    attributes_to_retrieve: Option<Vec<String>>,
+    documents: RoaringBitmap,
+    index: Index,
+    pub total_documents: u64,
+}
+
+impl Serialize for DocumentsStreamer {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let rtxn = self.index.read_txn().unwrap();
+
+        let mut seq = serializer.serialize_seq(Some(self.documents.len() as usize)).unwrap();
+
+        let documents = some_documents(&self.index, &rtxn, self.documents.iter()).unwrap();
+        for document in documents {
+            let document = document.unwrap();
+            let document = match self.attributes_to_retrieve {
+                Some(ref attributes_to_retrieve) => permissive_json_pointer::select_values(
+                    &document,
+                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
+                ),
+                None => document,
+            };
+
+            seq.serialize_element(&document)?;
+        }
+        seq.end()
+    }
+}
+
+fn retrieve_documents(
+    index: Index,
     offset: usize,
     limit: usize,
     filter: Option<Value>,
-    attributes_to_retrieve: Option<Vec<S>>,
-) -> Result<(u64, Vec<Document>), ResponseError> {
+    attributes_to_retrieve: Option<Vec<String>>,
+) -> Result<DocumentsStreamer, ResponseError> {
     let rtxn = index.read_txn()?;
     let filter = &filter;
     let filter = if let Some(filter) = filter {
@@ -607,7 +673,7 @@ fn retrieve_documents<S: AsRef<str>>(
     };
 
     let candidates = if let Some(filter) = filter {
-        filter.evaluate(&rtxn, index).map_err(|err| match err {
+        filter.evaluate(&rtxn, &index).map_err(|err| match err {
             milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
                 ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
             }
@@ -616,28 +682,14 @@ fn retrieve_documents<S: AsRef<str>>(
     } else {
         index.documents_ids(&rtxn)?
     };
+    drop(rtxn);
 
-    let (it, number_of_documents) = {
-        let number_of_documents = candidates.len();
-        (
-            some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
-            number_of_documents,
-        )
-    };
-
-    let documents: Result<Vec<_>, ResponseError> = it
-        .map(|document| {
-            Ok(match &attributes_to_retrieve {
-                Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
-                    &document?,
-                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
-                ),
-                None => document?,
-            })
-        })
-        .collect();
-
-    Ok((number_of_documents, documents?))
+    Ok(DocumentsStreamer {
+        total_documents: candidates.len(),
+        attributes_to_retrieve,
+        documents: candidates.into_iter().skip(offset).take(limit).collect(),
+        index,
+    })
 }
 
 fn retrieve_document<S: AsRef<str>>(
diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs
index c25aeee70..a7e84d19c 100644
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -1,4 +1,5 @@
 use std::collections::BTreeMap;
+use std::fmt;
 
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
@@ -124,20 +125,31 @@ pub struct Pagination {
     pub limit: usize,
 }
 
-#[derive(Debug, Clone, Serialize)]
-pub struct PaginationView<T> {
-    pub results: Vec<T>,
+#[derive(Clone, Serialize)]
+pub struct PaginationView<T: Serialize> {
+    pub results: T,
     pub offset: usize,
     pub limit: usize,
     pub total: usize,
 }
 
+impl<T: Serialize> fmt::Debug for PaginationView<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("PaginationView")
+            .field("offset", &self.offset)
+            .field("limit", &self.limit)
+            .field("total", &self.total)
+            .field("results", &"[...]")
+            .finish()
+    }
+}
+
 impl Pagination {
     /// Given the full data to paginate, returns the selected section.
     pub fn auto_paginate_sized<T>(
         self,
         content: impl IntoIterator<Item = T> + ExactSizeIterator,
-    ) -> PaginationView<T>
+    ) -> PaginationView<Vec<T>>
     where
         T: Serialize,
     {
@@ -151,7 +163,7 @@ impl Pagination {
         self,
         total: usize,
         content: impl IntoIterator<Item = T>,
-    ) -> PaginationView<T>
+    ) -> PaginationView<Vec<T>>
     where
         T: Serialize,
     {
@@ -161,7 +173,7 @@ impl Pagination {
 
     /// Given the data already paginated + the total number of elements, it stores
     /// everything in a [PaginationResult].
-    pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<T>
+    pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<Vec<T>>
     where
         T: Serialize,
     {
@@ -169,8 +181,8 @@ impl Pagination {
     }
 }
 
-impl<T> PaginationView<T> {
-    pub fn new(offset: usize, limit: usize, total: usize, results: Vec<T>) -> Self {
+impl<T: Serialize> PaginationView<T> {
+    pub fn new(offset: usize, limit: usize, total: usize, results: T) -> Self {
         Self { offset, limit, results, total }
     }
 }

From c85d1752dd3937ffdfc8f86f16108bfa9388aaac Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 18 Apr 2024 15:51:46 +0200
Subject: [PATCH 25/56] keep the same rtxn to compute the filters on the
 documents and to stream the documents later on

---
 meilisearch/src/routes/indexes/documents.rs | 28 +++++++++++++++++----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 78af7a098..9d34fcdfe 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -1,4 +1,5 @@
 use std::io::{ErrorKind, Write};
+use std::pin::Pin;
 
 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
@@ -250,7 +251,7 @@ impl Write for Writer2Streamer {
 }
 
 pub fn stream(
-    data: impl Serialize + Send + Sync + 'static,
+    data: impl Serialize + Send + 'static,
 ) -> impl Stream<Item = Result<Bytes, anyhow::Error>> {
     let (sender, receiver) = tokio::sync::mpsc::channel::<Result<Bytes, anyhow::Error>>(1);
 
@@ -626,20 +627,31 @@ fn some_documents<'a, 't: 'a>(
 pub struct DocumentsStreamer {
     attributes_to_retrieve: Option<Vec<String>>,
     documents: RoaringBitmap,
-    index: Index,
+    // safety: The `rtxn` contains a reference to the index thus:
+    //         - The `rtxn` MUST BE dropped before the index.
+    //         - The index MUST BE `Pin`ned in RAM and never moved.
+    rtxn: Option<RoTxn<'static>>,
+    index: Pin<Box<Index>>,
     pub total_documents: u64,
 }
 
+impl Drop for DocumentsStreamer {
+    fn drop(&mut self) {
+        // safety: we drop the rtxn before the index
+        self.rtxn = None;
+    }
+}
+
 impl Serialize for DocumentsStreamer {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: serde::Serializer,
     {
-        let rtxn = self.index.read_txn().unwrap();
+        let rtxn = self.rtxn.as_ref().unwrap();
 
         let mut seq = serializer.serialize_seq(Some(self.documents.len() as usize)).unwrap();
 
-        let documents = some_documents(&self.index, &rtxn, self.documents.iter()).unwrap();
+        let documents = some_documents(&self.index, rtxn, self.documents.iter()).unwrap();
         for document in documents {
             let document = document.unwrap();
             let document = match self.attributes_to_retrieve {
@@ -663,7 +675,10 @@ fn retrieve_documents(
     filter: Option<Value>,
     attributes_to_retrieve: Option<Vec<String>>,
 ) -> Result<DocumentsStreamer, ResponseError> {
+    // safety: The index MUST NOT move while we hold the `rtxn` on it
+    let index = Box::pin(index);
     let rtxn = index.read_txn()?;
+
     let filter = &filter;
     let filter = if let Some(filter) = filter {
         parse_filter(filter)
@@ -682,12 +697,15 @@ fn retrieve_documents(
     } else {
         index.documents_ids(&rtxn)?
     };
-    drop(rtxn);
 
     Ok(DocumentsStreamer {
         total_documents: candidates.len(),
         attributes_to_retrieve,
         documents: candidates.into_iter().skip(offset).take(limit).collect(),
+        // safety: It is safe to make the lifetime in the Rtxn static because it points to the index right below.
+        //         The index is `Pin`ned on the RAM and won't move even if the structure is moved.
+        //         The `rtxn` is held in an `Option`, so we're able to drop it before dropping the index.
+        rtxn: Some(unsafe { std::mem::transmute(rtxn) }),
         index,
     })
 }

From 897d25780ef7a442d4bc1ac2599eea49dcf75448 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 16 May 2024 16:10:55 +0200
Subject: [PATCH 26/56] update milli to latest version

---
 Cargo.lock                                    | 26 +++++--------------
 index-scheduler/src/batch.rs                  | 10 ++++---
 index-scheduler/src/lib.rs                    | 14 +++++-----
 meilisearch-auth/src/store.rs                 |  2 +-
 meilisearch-types/src/error.rs                |  1 -
 meilitool/src/main.rs                         |  8 ++----
 milli/Cargo.toml                              | 10 +++++--
 milli/fuzz/.gitignore                         |  3 +++
 milli/src/error.rs                            |  3 ---
 milli/src/index.rs                            |  7 ++++-
 milli/src/update/facet/mod.rs                 |  2 +-
 milli/src/update/index_documents/mod.rs       |  2 +-
 .../src/update/index_documents/typed_chunk.rs |  3 +--
 13 files changed, 44 insertions(+), 47 deletions(-)
 create mode 100644 milli/fuzz/.gitignore

diff --git a/Cargo.lock b/Cargo.lock
index 5d87830a5..7df0e7e86 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -378,9 +378,7 @@ dependencies = [
 
 [[package]]
 name = "arroy"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57"
+version = "0.3.0"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -1536,9 +1534,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 
 [[package]]
 name = "doxygen-rs"
-version = "0.2.2"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
+checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
 dependencies = [
  "phf",
 ]
@@ -2262,12 +2260,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "heed"
-version = "0.20.0-alpha.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
+version = "0.20.0"
 dependencies = [
  "bitflags 2.5.0",
- "bytemuck",
  "byteorder",
  "heed-traits",
  "heed-types",
@@ -2281,15 +2276,11 @@ dependencies = [
 
 [[package]]
 name = "heed-traits"
-version = "0.20.0-alpha.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
+version = "0.20.0"
 
 [[package]]
 name = "heed-types"
-version = "0.20.0-alpha.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
+version = "0.20.0"
 dependencies = [
  "bincode",
  "byteorder",
@@ -3189,14 +3180,11 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
 
 [[package]]
 name = "lmdb-master-sys"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
+version = "0.2.0"
 dependencies = [
  "cc",
  "doxygen-rs",
  "libc",
- "pkg-config",
 ]
 
 [[package]]
diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index bc9823a01..582497c15 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -785,10 +785,12 @@ impl IndexScheduler {
                 let dst = temp_snapshot_dir.path().join("auth");
                 fs::create_dir_all(&dst)?;
                 // TODO We can't use the open_auth_store_env function here but we should
-                let auth = milli::heed::EnvOpenOptions::new()
-                    .map_size(1024 * 1024 * 1024) // 1 GiB
-                    .max_dbs(2)
-                    .open(&self.auth_path)?;
+                let auth = unsafe {
+                    milli::heed::EnvOpenOptions::new()
+                        .map_size(1024 * 1024 * 1024) // 1 GiB
+                        .max_dbs(2)
+                        .open(&self.auth_path)
+                }?;
                 auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
 
                 // 5. Copy and tarball the flat snapshot
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 5704f5354..dd2b296f6 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -453,10 +453,12 @@ impl IndexScheduler {
             )
         };
 
-        let env = heed::EnvOpenOptions::new()
-            .max_dbs(11)
-            .map_size(budget.task_db_size)
-            .open(options.tasks_path)?;
+        let env = unsafe {
+            heed::EnvOpenOptions::new()
+                .max_dbs(11)
+                .map_size(budget.task_db_size)
+                .open(options.tasks_path)
+        }?;
 
         let features = features::FeatureData::new(&env, options.instance_features)?;
 
@@ -585,9 +587,9 @@ impl IndexScheduler {
     }
 
     fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
-        if let Ok(env) =
+        if let Ok(env) = unsafe {
             heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
-        {
+        } {
             env.prepare_for_closing().wait();
             true
         } else {
diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs
index 1eebd3fe9..ef992e836 100644
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
     let mut options = EnvOpenOptions::new();
     options.map_size(AUTH_STORE_SIZE); // 1GB
     options.max_dbs(2);
-    options.open(path)
+    unsafe { options.open(path) }
 }
 
 impl HeedAuthStore {
diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index eea012331..158dfae92 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -423,7 +423,6 @@ impl ErrorCode for HeedError {
             HeedError::Mdb(_)
             | HeedError::Encoding(_)
             | HeedError::Decoding(_)
-            | HeedError::InvalidDatabaseTyping
             | HeedError::DatabaseClosing
             | HeedError::BadOpenOptions { .. } => Code::Internal,
         }
diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs
index bfcbfdd6d..06c4890a5 100644
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -80,9 +80,7 @@ fn main() -> anyhow::Result<()> {
 /// Clears the task queue located at `db_path`.
 fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
     let path = db_path.join("tasks");
-    let env = EnvOpenOptions::new()
-        .max_dbs(100)
-        .open(&path)
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
         .with_context(|| format!("While trying to open {:?}", path.display()))?;
 
     eprintln!("Deleting tasks from the database...");
@@ -193,9 +191,7 @@ fn export_a_dump(
         FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
 
     let index_scheduler_path = db_path.join("tasks");
-    let env = EnvOpenOptions::new()
-        .max_dbs(100)
-        .open(&index_scheduler_path)
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
 
     eprintln!("Dumping the keys...");
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 7d903178b..ab63a1fa7 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -30,7 +30,12 @@ grenad = { version = "0.4.6", default-features = false, features = [
     "rayon",
     "tempfile",
 ] }
-heed = { version = "0.20.0-alpha.9", default-features = false, features = [
+# heed = { version = "0.20.0", default-features = false, features = [
+#     "serde-json",
+#     "serde-bincode",
+#     "read-txn-no-tls",
+# ] }
+heed = { path = "../../heed/heed", default-features = false, features = [
     "serde-json",
     "serde-bincode",
     "read-txn-no-tls",
@@ -82,7 +87,8 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.8"
 liquid = "0.26.4"
-arroy = "0.2.0"
+# arroy = "0.2.0"
+arroy = { path = "../../arroy" }
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.9.7", features = ["json"] }
diff --git a/milli/fuzz/.gitignore b/milli/fuzz/.gitignore
new file mode 100644
index 000000000..a0925114d
--- /dev/null
+++ b/milli/fuzz/.gitignore
@@ -0,0 +1,3 @@
+target
+corpus
+artifacts
diff --git a/milli/src/error.rs b/milli/src/error.rs
index 009781fcf..6db0dcac1 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -48,8 +48,6 @@ pub enum InternalError {
     GrenadInvalidFormatVersion,
     #[error("Invalid merge while processing {process}")]
     IndexingMergingKeys { process: &'static str },
-    #[error("{}", HeedError::InvalidDatabaseTyping)]
-    InvalidDatabaseTyping,
     #[error(transparent)]
     RayonThreadPool(#[from] ThreadPoolBuildError),
     #[error(transparent)]
@@ -429,7 +427,6 @@ impl From<HeedError> for Error {
             // TODO use the encoding
             HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
             HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
-            HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
             HeedError::DatabaseClosing => InternalError(DatabaseClosing),
             HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
         }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 42b9cb111..739a7f202 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -184,7 +184,7 @@ impl Index {
 
         options.max_dbs(25);
 
-        let env = options.open(path)?;
+        let env = unsafe { options.open(path) }?;
         let mut wtxn = env.write_txn()?;
         let main = env.database_options().name(MAIN).create(&mut wtxn)?;
         let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
@@ -294,6 +294,11 @@ impl Index {
         self.env.read_txn()
     }
 
+    /// Create a static read transaction to be able to read the index without keeping a reference to it.
+    pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
+        self.env.clone().static_read_txn()
+    }
+
     /// Returns the canonicalized path where the heed `Env` of this `Index` lives.
     pub fn path(&self) -> &Path {
         self.env.path()
diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs
index 0af64c4c5..42994551f 100644
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -379,7 +379,7 @@ pub(crate) mod test_helpers {
             let mut options = heed::EnvOpenOptions::new();
             let options = options.map_size(4096 * 4 * 1000 * 100);
             let tempdir = tempfile::TempDir::new().unwrap();
-            let env = options.open(tempdir.path()).unwrap();
+            let env = unsafe { options.open(tempdir.path()) }.unwrap();
             let mut wtxn = env.write_txn().unwrap();
             let content = env.create_database(&mut wtxn, None).unwrap();
             wtxn.commit().unwrap();
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 936ce1efc..4d2fac7cb 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -556,7 +556,7 @@ where
                 let writer_index = (embedder_index as u16) << 8;
                 for k in 0..=u8::MAX {
                     let writer =
-                        arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?;
+                        arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension);
                     if writer.is_empty(wtxn)? {
                         break;
                     }
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 6aad290e5..e0de2d5a1 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -661,7 +661,7 @@ pub(crate) fn write_typed_chunk_into_index(
             )?;
             let writer_index = (embedder_index as u16) << 8;
             // FIXME: allow customizing distance
-            let writers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
+            let writers: Vec<_> = (0..=u8::MAX)
                 .map(|k| {
                     arroy::Writer::new(
                         index.vector_arroy,
@@ -670,7 +670,6 @@ pub(crate) fn write_typed_chunk_into_index(
                     )
                 })
                 .collect();
-            let writers = writers?;
 
             // remove vectors for docids we want them removed
             let merger = remove_vectors_builder.build();

From 273c6e8c5c28573af67b44d7d1f13a043a7b7915 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 16 May 2024 16:11:08 +0200
Subject: [PATCH 27/56] uses the latest version of heed to get rid of unsafe
 code

---
 Cargo.lock                                  | 14 +++++++++--
 meilisearch/src/routes/indexes/documents.rs | 28 ++++-----------------
 milli/Cargo.toml                            | 10 ++------
 3 files changed, 19 insertions(+), 33 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7df0e7e86..d9e96b029 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -378,7 +378,9 @@ dependencies = [
 
 [[package]]
 name = "arroy"
-version = "0.3.0"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -2260,7 +2262,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "heed"
-version = "0.20.0"
+version = "0.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
 dependencies = [
  "bitflags 2.5.0",
  "byteorder",
@@ -2277,10 +2281,14 @@ dependencies = [
 [[package]]
 name = "heed-traits"
 version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
 
 [[package]]
 name = "heed-types"
 version = "0.20.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3cb0d6ba3700c9a57e83c013693e3eddb68a6d9b6781cacafc62a0d992e8ddb3"
 dependencies = [
  "bincode",
  "byteorder",
@@ -3181,6 +3189,8 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
 [[package]]
 name = "lmdb-master-sys"
 version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
 dependencies = [
  "cc",
  "doxygen-rs",
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 9d34fcdfe..7c9b4b761 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -1,5 +1,4 @@
 use std::io::{ErrorKind, Write};
-use std::pin::Pin;
 
 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
@@ -627,31 +626,19 @@ fn some_documents<'a, 't: 'a>(
 pub struct DocumentsStreamer {
     attributes_to_retrieve: Option<Vec<String>>,
     documents: RoaringBitmap,
-    // safety: The `rtxn` contains a reference to the index thus:
-    //         - The `rtxn` MUST BE dropped before the index.
-    //         - The index MUST BE `Pin`ned in RAM and never moved.
-    rtxn: Option<RoTxn<'static>>,
-    index: Pin<Box<Index>>,
+    rtxn: RoTxn<'static>,
+    index: Index,
     pub total_documents: u64,
 }
 
-impl Drop for DocumentsStreamer {
-    fn drop(&mut self) {
-        // safety: we drop the rtxn before the index
-        self.rtxn = None;
-    }
-}
-
 impl Serialize for DocumentsStreamer {
     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: serde::Serializer,
     {
-        let rtxn = self.rtxn.as_ref().unwrap();
-
         let mut seq = serializer.serialize_seq(Some(self.documents.len() as usize)).unwrap();
 
-        let documents = some_documents(&self.index, rtxn, self.documents.iter()).unwrap();
+        let documents = some_documents(&self.index, &self.rtxn, self.documents.iter()).unwrap();
         for document in documents {
             let document = document.unwrap();
             let document = match self.attributes_to_retrieve {
@@ -675,9 +662,7 @@ fn retrieve_documents(
     filter: Option<Value>,
     attributes_to_retrieve: Option<Vec<String>>,
 ) -> Result<DocumentsStreamer, ResponseError> {
-    // safety: The index MUST NOT move while we hold the `rtxn` on it
-    let index = Box::pin(index);
-    let rtxn = index.read_txn()?;
+    let rtxn = index.static_read_txn()?;
 
     let filter = &filter;
     let filter = if let Some(filter) = filter {
@@ -702,10 +687,7 @@ fn retrieve_documents(
         total_documents: candidates.len(),
         attributes_to_retrieve,
         documents: candidates.into_iter().skip(offset).take(limit).collect(),
-        // safety: It is safe to make the lifetime in the Rtxn static because it points to the index right below.
-        //         The index is `Pin`ned on the RAM and won't move even if the structure is moved.
-        //         The `rtxn` is held in an `Option`, so we're able to drop it before dropping the index.
-        rtxn: Some(unsafe { std::mem::transmute(rtxn) }),
+        rtxn,
         index,
     })
 }
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index ab63a1fa7..c5dddd0fd 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -30,12 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
     "rayon",
     "tempfile",
 ] }
-# heed = { version = "0.20.0", default-features = false, features = [
-#     "serde-json",
-#     "serde-bincode",
-#     "read-txn-no-tls",
-# ] }
-heed = { path = "../../heed/heed", default-features = false, features = [
+heed = { version = "0.20.1", default-features = false, features = [
     "serde-json",
     "serde-bincode",
     "read-txn-no-tls",
@@ -87,8 +82,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.8"
 liquid = "0.26.4"
-# arroy = "0.2.0"
-arroy = { path = "../../arroy" }
+arroy = "0.3.1"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.9.7", features = ["json"] }

From 98c811247e1d8c92523f8f933383063f6e009d5a Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 11:22:16 +0200
Subject: [PATCH 28/56] Add parsed vectors module

---
 milli/src/vector/mod.rs            |   1 +
 milli/src/vector/parsed_vectors.rs | 149 +++++++++++++++++++++++++++++
 2 files changed, 150 insertions(+)
 create mode 100644 milli/src/vector/parsed_vectors.rs

diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index 306c1c1e9..d3d05a1c1 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -13,6 +13,7 @@ pub mod error;
 pub mod hf;
 pub mod manual;
 pub mod openai;
+pub mod parsed_vectors;
 pub mod settings;
 
 pub mod ollama;
diff --git a/milli/src/vector/parsed_vectors.rs b/milli/src/vector/parsed_vectors.rs
new file mode 100644
index 000000000..bf4b9ea83
--- /dev/null
+++ b/milli/src/vector/parsed_vectors.rs
@@ -0,0 +1,149 @@
+use std::collections::BTreeMap;
+
+use obkv::KvReader;
+use serde_json::{from_slice, Value};
+
+use super::Embedding;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::{FieldId, InternalError, UserError};
+
+pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
+
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[serde(untagged)]
+pub enum Vectors {
+    ImplicitlyUserProvided(VectorOrArrayOfVectors),
+    Explicit(ExplicitVectors),
+}
+
+impl Vectors {
+    pub fn into_array_of_vectors(self) -> Vec<Embedding> {
+        match self {
+            Vectors::ImplicitlyUserProvided(embeddings)
+            | Vectors::Explicit(ExplicitVectors { embeddings, user_provided: _ }) => {
+                embeddings.into_array_of_vectors().unwrap_or_default()
+            }
+        }
+    }
+}
+
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[serde(rename_all = "camelCase")]
+pub struct ExplicitVectors {
+    pub embeddings: VectorOrArrayOfVectors,
+    pub user_provided: bool,
+}
+
+pub struct ParsedVectorsDiff {
+    pub old: Option<BTreeMap<String, Vectors>>,
+    pub new: Option<BTreeMap<String, Vectors>>,
+}
+
+impl ParsedVectorsDiff {
+    pub fn new(
+        documents_diff: KvReader<'_, FieldId>,
+        old_vectors_fid: Option<FieldId>,
+        new_vectors_fid: Option<FieldId>,
+    ) -> Result<Self, Error> {
+        let old = match old_vectors_fid
+            .and_then(|vectors_fid| documents_diff.get(vectors_fid))
+            .map(KvReaderDelAdd::new)
+            .map(|obkv| to_vector_map(obkv, DelAdd::Deletion))
+            .transpose()
+        {
+            Ok(del) => del,
+            // ignore wrong shape for old version of documents, use an empty map in this case
+            Err(Error::InvalidMap(value)) => {
+                tracing::warn!(%value, "Previous version of the `_vectors` field had a wrong shape");
+                Default::default()
+            }
+            Err(error) => {
+                return Err(error);
+            }
+        }
+        .flatten();
+        let new = new_vectors_fid
+            .and_then(|vectors_fid| documents_diff.get(vectors_fid))
+            .map(KvReaderDelAdd::new)
+            .map(|obkv| to_vector_map(obkv, DelAdd::Addition))
+            .transpose()?
+            .flatten();
+        Ok(Self { old, new })
+    }
+
+    pub fn remove(&mut self, embedder_name: &str) -> (Option<Vectors>, Option<Vectors>) {
+        let old = self.old.as_mut().and_then(|old| old.remove(embedder_name));
+        let new = self.new.as_mut().and_then(|new| new.remove(embedder_name));
+        (old, new)
+    }
+}
+
+pub struct ParsedVectors(pub BTreeMap<String, Vectors>);
+
+impl ParsedVectors {
+    pub fn from_bytes(value: &[u8]) -> Result<Self, Error> {
+        let Ok(value) = from_slice(value) else {
+            let value = from_slice(value).map_err(Error::InternalSerdeJson)?;
+            return Err(Error::InvalidMap(value));
+        };
+        Ok(ParsedVectors(value))
+    }
+
+    pub fn retain_user_provided_vectors(&mut self) {
+        self.0.retain(|_k, v| match v {
+            Vectors::ImplicitlyUserProvided(_) => true,
+            Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => *user_provided,
+        });
+    }
+}
+
+pub enum Error {
+    InvalidMap(Value),
+    InternalSerdeJson(serde_json::Error),
+}
+
+impl Error {
+    pub fn to_crate_error(self, document_id: String) -> crate::Error {
+        match self {
+            Error::InvalidMap(value) => {
+                crate::Error::UserError(UserError::InvalidVectorsMapType { document_id, value })
+            }
+            Error::InternalSerdeJson(error) => {
+                crate::Error::InternalError(InternalError::SerdeJson(error))
+            }
+        }
+    }
+}
+
+fn to_vector_map(
+    obkv: KvReaderDelAdd,
+    side: DelAdd,
+) -> Result<Option<BTreeMap<String, Vectors>>, Error> {
+    Ok(if let Some(value) = obkv.get(side) {
+        let ParsedVectors(parsed_vectors) = ParsedVectors::from_bytes(value)?;
+        Some(parsed_vectors)
+    } else {
+        None
+    })
+}
+
+/// Represents either a vector or an array of multiple vectors.
+#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[serde(transparent)]
+pub struct VectorOrArrayOfVectors {
+    #[serde(with = "either::serde_untagged_optional")]
+    inner: Option<either::Either<Embedding, Vec<Embedding>>>,
+}
+
+impl VectorOrArrayOfVectors {
+    pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> {
+        match self.inner? {
+            either::Either::Left(vector) => Some(vec![vector]),
+            either::Either::Right(vectors) => Some(vectors),
+        }
+    }
+
+    pub fn from_array_of_vectors(array_of_vec: Vec<Embedding>) -> Self {
+        Self { inner: Some(either::Either::Right(array_of_vec)) }
+    }
+}

From 261de888b71a3ba4bc891b09e30715e68bf8a812 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 11:38:28 +0200
Subject: [PATCH 29/56] Add function to get the embeddings of a document in an
 index

---
 milli/src/index.rs | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/milli/src/index.rs b/milli/src/index.rs
index 739a7f202..66cd6f3cc 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1606,6 +1606,44 @@ impl Index {
     pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
         self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
     }
+
+    pub fn embeddings(
+        &self,
+        rtxn: &RoTxn<'_>,
+        docid: DocumentId,
+    ) -> Result<BTreeMap<String, Vec<crate::vector::Embedding>>> {
+        let mut res = BTreeMap::new();
+        for row in self.embedder_category_id.iter(rtxn)? {
+            let (embedder_name, embedder_id) = row?;
+            let embedder_id = (embedder_id as u16) << 8;
+            let mut embeddings = Vec::new();
+            'vectors: for i in 0..=u8::MAX {
+                let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
+                    .map(Some)
+                    .or_else(|e| match e {
+                        arroy::Error::MissingMetadata => Ok(None),
+                        e => Err(e),
+                    })
+                    .transpose();
+
+                let Some(reader) = reader else {
+                    break 'vectors;
+                };
+
+                let embedding = reader?.item_vector(rtxn, docid)?;
+                if let Some(embedding) = embedding {
+                    embeddings.push(embedding)
+                } else {
+                    break 'vectors;
+                }
+            }
+
+            if !embeddings.is_empty() {
+                res.insert(embedder_name.to_owned(), embeddings);
+            }
+        }
+        Ok(res)
+    }
 }
 
 #[cfg(test)]

From 52d9cb6e5af5dcfe23638354f3b124a0371b007d Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 11:42:26 +0200
Subject: [PATCH 30/56] Refactor vector indexing

- use the parsed_vectors module
- only parse `_vectors` once per document, instead of once per embedder per document
---
 milli/src/error.rs                            |   2 +-
 milli/src/lib.rs                              |  29 --
 .../extract/extract_vector_points.rs          | 373 +++++++++---------
 .../src/update/index_documents/extract/mod.rs |  46 +--
 milli/src/vector/mod.rs                       |   4 +
 5 files changed, 218 insertions(+), 236 deletions(-)

diff --git a/milli/src/error.rs b/milli/src/error.rs
index 6db0dcac1..e60252ec1 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -120,7 +120,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
     #[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
     InvalidVectorsType { document_id: Value, value: Value, subfield: String },
     #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
-    InvalidVectorsMapType { document_id: Value, value: Value },
+    InvalidVectorsMapType { document_id: String, value: Value },
     #[error("{0}")]
     InvalidFilter(String),
     #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index 881633b5c..f6b86f14a 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -362,35 +362,6 @@ pub fn normalize_facet(original: &str) -> String {
     CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
 }
 
-/// Represents either a vector or an array of multiple vectors.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
-#[serde(transparent)]
-pub struct VectorOrArrayOfVectors {
-    #[serde(with = "either::serde_untagged_optional")]
-    inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
-}
-
-impl VectorOrArrayOfVectors {
-    pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
-        match self.inner? {
-            either::Either::Left(vector) => Some(vec![vector]),
-            either::Either::Right(vectors) => Some(vectors),
-        }
-    }
-}
-
-/// Normalize a vector by dividing the dimensions by the length of it.
-pub fn normalize_vector(mut vector: Vec<f32>) -> Vec<f32> {
-    let squared: f32 = vector.iter().map(|x| x * x).sum();
-    let length = squared.sqrt();
-    if length <= f32::EPSILON {
-        vector
-    } else {
-        vector.iter_mut().for_each(|x| *x /= length);
-        vector
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use serde_json::json;
diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs
index 322fa3725..8b78a8c55 100644
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -10,16 +10,16 @@ use bytemuck::cast_slice;
 use grenad::Writer;
 use itertools::EitherOrBoth;
 use ordered_float::OrderedFloat;
-use serde_json::{from_slice, Value};
+use serde_json::Value;
 
 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
-use crate::error::UserError;
 use crate::prompt::Prompt;
 use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::helpers::try_split_at;
 use crate::update::settings::InnerIndexSettingsDiff;
+use crate::vector::parsed_vectors::{ParsedVectorsDiff, RESERVED_VECTORS_FIELD_NAME};
 use crate::vector::Embedder;
-use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};
+use crate::{DocumentId, Result, ThreadPoolNoAbort};
 
 /// The length of the elements that are always in the buffer when inserting new values.
 const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
@@ -31,6 +31,10 @@ pub struct ExtractedVectorPoints {
     pub remove_vectors: grenad::Reader<BufReader<File>>,
     // docid -> prompt
     pub prompts: grenad::Reader<BufReader<File>>,
+
+    // embedder
+    pub embedder_name: String,
+    pub embedder: Arc<Embedder>,
 }
 
 enum VectorStateDelta {
@@ -65,6 +69,19 @@ impl VectorStateDelta {
     }
 }
 
+struct EmbedderVectorExtractor {
+    embedder_name: String,
+    embedder: Arc<Embedder>,
+    prompt: Arc<Prompt>,
+
+    // (docid, _index) -> KvWriterDelAdd -> Vector
+    manual_vectors_writer: Writer<BufWriter<File>>,
+    // (docid) -> (prompt)
+    prompts_writer: Writer<BufWriter<File>>,
+    // (docid) -> ()
+    remove_vectors_writer: Writer<BufWriter<File>>,
+}
+
 /// Extracts the embedding vector contained in each document under the `_vectors` field.
 ///
 /// Returns the generated grenad reader containing the docid as key associated to the Vec<f32>
@@ -72,35 +89,55 @@ impl VectorStateDelta {
 pub fn extract_vector_points<R: io::Read + io::Seek>(
     obkv_documents: grenad::Reader<R>,
     indexer: GrenadParameters,
-    settings_diff: &InnerIndexSettingsDiff,
-    prompt: &Prompt,
-    embedder_name: &str,
-) -> Result<ExtractedVectorPoints> {
+    settings_diff: Arc<InnerIndexSettingsDiff>,
+) -> Result<Vec<ExtractedVectorPoints>> {
     puffin::profile_function!();
 
+    let reindex_vectors = settings_diff.reindex_vectors();
+
     let old_fields_ids_map = &settings_diff.old.fields_ids_map;
     let new_fields_ids_map = &settings_diff.new.fields_ids_map;
+    // the vector field id may have changed
+    let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
+    // filter the old vector fid if the settings has been changed forcing reindexing.
+    let old_vectors_fid = old_vectors_fid.filter(|_| !reindex_vectors);
 
-    // (docid, _index) -> KvWriterDelAdd -> Vector
-    let mut manual_vectors_writer = create_writer(
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        tempfile::tempfile()?,
-    );
+    let new_vectors_fid = new_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
 
-    // (docid) -> (prompt)
-    let mut prompts_writer = create_writer(
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        tempfile::tempfile()?,
-    );
+    let mut extractors = Vec::new();
+    for (embedder_name, (embedder, prompt)) in
+        settings_diff.new.embedding_configs.clone().into_iter()
+    {
+        // (docid, _index) -> KvWriterDelAdd -> Vector
+        let manual_vectors_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
 
-    // (docid) -> ()
-    let mut remove_vectors_writer = create_writer(
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        tempfile::tempfile()?,
-    );
+        // (docid) -> (prompt)
+        let prompts_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        // (docid) -> ()
+        let remove_vectors_writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );
+
+        extractors.push(EmbedderVectorExtractor {
+            embedder_name,
+            embedder,
+            prompt,
+            manual_vectors_writer,
+            prompts_writer,
+            remove_vectors_writer,
+        });
+    }
 
     let mut key_buffer = Vec::new();
     let mut cursor = obkv_documents.into_cursor()?;
@@ -114,152 +151,140 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
         key_buffer.clear();
         key_buffer.extend_from_slice(docid_bytes);
 
-        // since we only needs the primary key when we throw an error we create this getter to
+        // since we only need the primary key when we throw an error we create this getter to
         // lazily get it when needed
         let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };
 
-        // the vector field id may have changed
-        let old_vectors_fid = old_fields_ids_map.id("_vectors");
-        // filter the old vector fid if the settings has been changed forcing reindexing.
-        let old_vectors_fid = old_vectors_fid.filter(|_| !settings_diff.reindex_vectors());
+        let mut parsed_vectors = ParsedVectorsDiff::new(obkv, old_vectors_fid, new_vectors_fid)
+            .map_err(|error| error.to_crate_error(document_id().to_string()))?;
 
-        let new_vectors_fid = new_fields_ids_map.id("_vectors");
-        let vectors_field = {
-            let del = old_vectors_fid
-                .and_then(|vectors_fid| obkv.get(vectors_fid))
-                .map(KvReaderDelAdd::new)
-                .map(|obkv| to_vector_map(obkv, DelAdd::Deletion, &document_id))
-                .transpose()?
-                .flatten();
-            let add = new_vectors_fid
-                .and_then(|vectors_fid| obkv.get(vectors_fid))
-                .map(KvReaderDelAdd::new)
-                .map(|obkv| to_vector_map(obkv, DelAdd::Addition, &document_id))
-                .transpose()?
-                .flatten();
-            (del, add)
-        };
+        for EmbedderVectorExtractor {
+            embedder_name,
+            embedder: _,
+            prompt,
+            manual_vectors_writer,
+            prompts_writer,
+            remove_vectors_writer,
+        } in extractors.iter_mut()
+        {
+            let delta = match parsed_vectors.remove(embedder_name) {
+                (Some(old), Some(new)) => {
+                    // no autogeneration
+                    let del_vectors = old.into_array_of_vectors();
+                    let add_vectors = new.into_array_of_vectors();
 
-        let (del_map, add_map) = vectors_field;
-
-        let del_value = del_map.and_then(|mut map| map.remove(embedder_name));
-        let add_value = add_map.and_then(|mut map| map.remove(embedder_name));
-
-        let delta = match (del_value, add_value) {
-            (Some(old), Some(new)) => {
-                // no autogeneration
-                let del_vectors = extract_vectors(old, document_id, embedder_name)?;
-                let add_vectors = extract_vectors(new, document_id, embedder_name)?;
-
-                if add_vectors.len() > usize::from(u8::MAX) {
-                    return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
-                        document_id().to_string(),
-                        add_vectors.len(),
-                    )));
-                }
-
-                VectorStateDelta::ManualDelta(del_vectors, add_vectors)
-            }
-            (Some(_old), None) => {
-                // Do we keep this document?
-                let document_is_kept = obkv
-                    .iter()
-                    .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
-                    .any(|deladd| deladd.get(DelAdd::Addition).is_some());
-                if document_is_kept {
-                    // becomes autogenerated
-                    VectorStateDelta::NowGenerated(prompt.render(
-                        obkv,
-                        DelAdd::Addition,
-                        new_fields_ids_map,
-                    )?)
-                } else {
-                    VectorStateDelta::NowRemoved
-                }
-            }
-            (None, Some(new)) => {
-                // was possibly autogenerated, remove all vectors for that document
-                let add_vectors = extract_vectors(new, document_id, embedder_name)?;
-                if add_vectors.len() > usize::from(u8::MAX) {
-                    return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
-                        document_id().to_string(),
-                        add_vectors.len(),
-                    )));
-                }
-
-                VectorStateDelta::WasGeneratedNowManual(add_vectors)
-            }
-            (None, None) => {
-                // Do we keep this document?
-                let document_is_kept = obkv
-                    .iter()
-                    .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
-                    .any(|deladd| deladd.get(DelAdd::Addition).is_some());
-
-                if document_is_kept {
-                    // Don't give up if the old prompt was failing
-                    let old_prompt = Some(prompt)
-                        // TODO: this filter works because we erase the vec database when a embedding setting changes.
-                        // When vector pipeline will be optimized, this should be removed.
-                        .filter(|_| !settings_diff.reindex_vectors())
-                        .map(|p| {
-                            p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default()
-                        });
-                    let new_prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
-                    if old_prompt.as_ref() != Some(&new_prompt) {
-                        let old_prompt = old_prompt.unwrap_or_default();
-                        tracing::trace!(
-                            "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
-                        );
-                        VectorStateDelta::NowGenerated(new_prompt)
-                    } else {
-                        tracing::trace!("⏭️ Prompt unmodified, skipping");
-                        VectorStateDelta::NoChange
+                    if add_vectors.len() > usize::from(u8::MAX) {
+                        return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
+                            document_id().to_string(),
+                            add_vectors.len(),
+                        )));
                     }
-                } else {
-                    VectorStateDelta::NowRemoved
-                }
-            }
-        };
 
-        // and we finally push the unique vectors into the writer
-        push_vectors_diff(
-            &mut remove_vectors_writer,
-            &mut prompts_writer,
-            &mut manual_vectors_writer,
-            &mut key_buffer,
-            delta,
-            settings_diff,
-        )?;
+                    VectorStateDelta::ManualDelta(del_vectors, add_vectors)
+                }
+                (Some(_old), None) => {
+                    // Do we keep this document?
+                    let document_is_kept = obkv
+                        .iter()
+                        .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
+                        .any(|deladd| deladd.get(DelAdd::Addition).is_some());
+                    if document_is_kept {
+                        // becomes autogenerated
+                        VectorStateDelta::NowGenerated(prompt.render(
+                            obkv,
+                            DelAdd::Addition,
+                            new_fields_ids_map,
+                        )?)
+                    } else {
+                        VectorStateDelta::NowRemoved
+                    }
+                }
+                (None, Some(new)) => {
+                    // was possibly autogenerated, remove all vectors for that document
+                    let add_vectors = new.into_array_of_vectors();
+                    if add_vectors.len() > usize::from(u8::MAX) {
+                        return Err(crate::Error::UserError(crate::UserError::TooManyVectors(
+                            document_id().to_string(),
+                            add_vectors.len(),
+                        )));
+                    }
+
+                    VectorStateDelta::WasGeneratedNowManual(add_vectors)
+                }
+                (None, None) => {
+                    // Do we keep this document?
+                    let document_is_kept = obkv
+                        .iter()
+                        .map(|(_, deladd)| KvReaderDelAdd::new(deladd))
+                        .any(|deladd| deladd.get(DelAdd::Addition).is_some());
+
+                    if document_is_kept {
+                        // Don't give up if the old prompt was failing
+                        let old_prompt = Some(&prompt)
+                            // TODO: this filter works because we erase the vec database when a embedding setting changes.
+                            // When vector pipeline will be optimized, this should be removed.
+                            .filter(|_| !settings_diff.reindex_vectors())
+                            .map(|p| {
+                                p.render(obkv, DelAdd::Deletion, old_fields_ids_map)
+                                    .unwrap_or_default()
+                            });
+                        let new_prompt =
+                            prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
+                        if old_prompt.as_ref() != Some(&new_prompt) {
+                            let old_prompt = old_prompt.unwrap_or_default();
+                            tracing::trace!(
+                                "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
+                            );
+                            VectorStateDelta::NowGenerated(new_prompt)
+                        } else {
+                            tracing::trace!("⏭️ Prompt unmodified, skipping");
+                            VectorStateDelta::NoChange
+                        }
+                    } else {
+                        VectorStateDelta::NowRemoved
+                    }
+                }
+            };
+
+            // and we finally push the unique vectors into the writer
+            push_vectors_diff(
+                remove_vectors_writer,
+                prompts_writer,
+                manual_vectors_writer,
+                &mut key_buffer,
+                delta,
+                reindex_vectors,
+            )?;
+        }
     }
 
-    Ok(ExtractedVectorPoints {
-        // docid, _index -> KvWriterDelAdd -> Vector
-        manual_vectors: writer_into_reader(manual_vectors_writer)?,
-        // docid -> ()
-        remove_vectors: writer_into_reader(remove_vectors_writer)?,
-        // docid -> prompt
-        prompts: writer_into_reader(prompts_writer)?,
-    })
-}
+    /////
 
-fn to_vector_map(
-    obkv: KvReaderDelAdd,
-    side: DelAdd,
-    document_id: &impl Fn() -> Value,
-) -> Result<Option<serde_json::Map<String, Value>>> {
-    Ok(if let Some(value) = obkv.get(side) {
-        let Ok(value) = from_slice(value) else {
-            let value = from_slice(value).map_err(InternalError::SerdeJson)?;
-            return Err(crate::Error::UserError(UserError::InvalidVectorsMapType {
-                document_id: document_id(),
-                value,
-            }));
-        };
-        Some(value)
-    } else {
-        None
-    })
+    let mut results = Vec::new();
+
+    for EmbedderVectorExtractor {
+        embedder_name,
+        embedder,
+        prompt: _,
+        manual_vectors_writer,
+        prompts_writer,
+        remove_vectors_writer,
+    } in extractors
+    {
+        results.push(ExtractedVectorPoints {
+            // docid, _index -> KvWriterDelAdd -> Vector
+            manual_vectors: writer_into_reader(manual_vectors_writer)?,
+            // docid -> ()
+            remove_vectors: writer_into_reader(remove_vectors_writer)?,
+            // docid -> prompt
+            prompts: writer_into_reader(prompts_writer)?,
+
+            embedder,
+            embedder_name,
+        })
+    }
+
+    Ok(results)
 }
 
 /// Computes the diff between both Del and Add numbers and
@@ -270,14 +295,14 @@ fn push_vectors_diff(
     manual_vectors_writer: &mut Writer<BufWriter<File>>,
     key_buffer: &mut Vec<u8>,
     delta: VectorStateDelta,
-    settings_diff: &InnerIndexSettingsDiff,
+    reindex_vectors: bool,
 ) -> Result<()> {
     puffin::profile_function!();
     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
     if must_remove
     // TODO: the below condition works because we erase the vec database when a embedding setting changes.
     // When vector pipeline will be optimized, this should be removed.
-    && !settings_diff.reindex_vectors()
+    && !reindex_vectors
     {
         key_buffer.truncate(TRUNCATE_SIZE);
         remove_vectors_writer.insert(&key_buffer, [])?;
@@ -308,7 +333,7 @@ fn push_vectors_diff(
             EitherOrBoth::Left(vector) => {
                 // TODO: the below condition works because we erase the vec database when a embedding setting changes.
                 // When vector pipeline will be optimized, this should be removed.
-                if !settings_diff.reindex_vectors() {
+                if !reindex_vectors {
                     // We insert only the Del part of the Obkv to inform
                     // that we only want to remove all those vectors.
                     let mut obkv = KvWriterDelAdd::memory();
@@ -336,26 +361,6 @@ fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering {
     a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat))
 }
 
-/// Extracts the vectors from a JSON value.
-fn extract_vectors(
-    value: Value,
-    document_id: impl Fn() -> Value,
-    name: &str,
-) -> Result<Vec<Vec<f32>>> {
-    // FIXME: ugly clone of the vectors here
-    match serde_json::from_value(value.clone()) {
-        Ok(vectors) => {
-            Ok(VectorOrArrayOfVectors::into_array_of_vectors(vectors).unwrap_or_default())
-        }
-        Err(_) => Err(UserError::InvalidVectorsType {
-            document_id: document_id(),
-            value,
-            subfield: name.to_owned(),
-        }
-        .into()),
-    }
-}
-
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
 pub fn extract_embeddings<R: io::Read + io::Seek>(
     // docid, prompt
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 573e0898a..0ea0fcc5c 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -226,27 +226,31 @@ fn send_original_documents_data(
     let original_documents_chunk =
         original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
 
-    let documents_chunk_cloned = original_documents_chunk.clone();
-    let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
-
     let request_threads = ThreadPoolNoAbortBuilder::new()
         .num_threads(crate::vector::REQUEST_PARALLELISM)
         .thread_name(|index| format!("embedding-request-{index}"))
         .build()?;
 
-    if settings_diff.reindex_vectors() || !settings_diff.settings_update_only() {
+    let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
+        // no point in indexing vectors without embedders
+        && (!settings_diff.new.embedding_configs.inner_as_ref().is_empty());
+
+    if index_vectors {
         let settings_diff = settings_diff.clone();
+
+        let original_documents_chunk = original_documents_chunk.clone();
+        let lmdb_writer_sx = lmdb_writer_sx.clone();
         rayon::spawn(move || {
-            for (name, (embedder, prompt)) in settings_diff.new.embedding_configs.clone() {
-                let result = extract_vector_points(
-                    documents_chunk_cloned.clone(),
-                    indexer,
-                    &settings_diff,
-                    &prompt,
-                    &name,
-                );
-                match result {
-                    Ok(ExtractedVectorPoints { manual_vectors, remove_vectors, prompts }) => {
+            match extract_vector_points(original_documents_chunk.clone(), indexer, settings_diff) {
+                Ok(extracted_vectors) => {
+                    for ExtractedVectorPoints {
+                        manual_vectors,
+                        remove_vectors,
+                        prompts,
+                        embedder_name,
+                        embedder,
+                    } in extracted_vectors
+                    {
                         let embeddings = match extract_embeddings(
                             prompts,
                             indexer,
@@ -255,28 +259,26 @@ fn send_original_documents_data(
                         ) {
                             Ok(results) => Some(results),
                             Err(error) => {
-                                let _ = lmdb_writer_sx_cloned.send(Err(error));
+                                let _ = lmdb_writer_sx.send(Err(error));
                                 None
                             }
                         };
-
                         if !(remove_vectors.is_empty()
                             && manual_vectors.is_empty()
                             && embeddings.as_ref().map_or(true, |e| e.is_empty()))
                         {
-                            let _ = lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints {
+                            let _ = lmdb_writer_sx.send(Ok(TypedChunk::VectorPoints {
                                 remove_vectors,
                                 embeddings,
                                 expected_dimension: embedder.dimensions(),
                                 manual_vectors,
-                                embedder_name: name,
+                                embedder_name,
                             }));
                         }
                     }
-
-                    Err(error) => {
-                        let _ = lmdb_writer_sx_cloned.send(Err(error));
-                    }
+                }
+                Err(error) => {
+                    let _ = lmdb_writer_sx.send(Err(error));
                 }
             }
         });
diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs
index d3d05a1c1..1922bb389 100644
--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -148,6 +148,10 @@ impl EmbeddingConfigs {
         self.get(self.get_default_embedder_name())
     }
 
+    pub fn inner_as_ref(&self) -> &HashMap<String, (Arc<Embedder>, Arc<Prompt>)> {
+        &self.0
+    }
+
     /// Get the name of the default embedder configuration.
     ///
     /// The default embedder is determined as follows:

From 02714ef5edb87f9fb371efeb2208526c2dbdb284 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 11:43:16 +0200
Subject: [PATCH 31/56] Add vectors from vector DB in dump

---
 index-scheduler/src/batch.rs | 54 ++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index 582497c15..40398dc37 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -31,6 +31,7 @@ use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
     IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
+use meilisearch_types::milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
 use meilisearch_types::milli::{self, Filter};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
@@ -916,8 +917,57 @@ impl IndexScheduler {
                         if self.must_stop_processing.get() {
                             return Err(Error::AbortedTask);
                         }
-                        let (_id, doc) = ret?;
-                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+
+                        let (id, doc) = ret?;
+
+                        let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+
+                        'inject_vectors: {
+                            let embeddings = index.embeddings(&rtxn, id)?;
+
+                            if embeddings.is_empty() {
+                                break 'inject_vectors;
+                            }
+
+                            let vectors = document
+                                .entry(RESERVED_VECTORS_FIELD_NAME.to_owned())
+                                .or_insert(serde_json::Value::Object(Default::default()));
+
+                            let serde_json::Value::Object(vectors) = vectors else {
+                                return Err(milli::Error::UserError(
+                                    milli::UserError::InvalidVectorsMapType {
+                                        document_id: {
+                                            if let Ok(Some(Ok(index))) = index
+                                                .external_id_of(&rtxn, std::iter::once(id))
+                                                .map(|it| it.into_iter().next())
+                                            {
+                                                index
+                                            } else {
+                                                format!("internal docid={id}")
+                                            }
+                                        },
+                                        value: vectors.clone(),
+                                    },
+                                )
+                                .into());
+                            };
+
+                            /// some tests to consider:
+                            ///
+                            /// - dump, then import, then change a document with autogenerated vectors
+                            for (embedder_name, embeddings) in embeddings {
+                                // don't change the entry if it already exists, because it was user-provided
+                                vectors.entry(embedder_name).or_insert_with(|| {
+
+                                        let embeddings = milli::vector::parsed_vectors::ExplicitVectors {
+                                            embeddings: milli::vector::parsed_vectors::VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
+                                            user_provided: false,
+                                        };
+                                        serde_json::to_value(embeddings).unwrap()
+                                });
+                            }
+                        }
+
                         index_dumper.push_document(&document)?;
                     }
 

From 2f7a8a4efb9248855a272aa7ec9e8d46a290a8f8 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 11:46:04 +0200
Subject: [PATCH 32/56] Don't write vectors that weren't autogenerated in
 document DB

---
 .../src/update/index_documents/typed_chunk.rs | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index e0de2d5a1..8eb9ead28 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -193,6 +193,10 @@ pub(crate) fn write_typed_chunk_into_index(
             let span = tracing::trace_span!(target: "indexing::write_db", "documents");
             let _entered = span.enter();
 
+            let fields_ids_map = index.fields_ids_map(wtxn)?;
+            let vectors_fid =
+                fields_ids_map.id(crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME);
+
             let mut builder = MergerBuilder::new(keep_latest_obkv as MergeFn);
             for typed_chunk in typed_chunks {
                 let TypedChunk::Documents(chunk) = typed_chunk else {
@@ -206,6 +210,8 @@ pub(crate) fn write_typed_chunk_into_index(
 
             let mut docids = index.documents_ids(wtxn)?;
             let mut iter = merger.into_stream_merger_iter()?;
+
+            let mut vectors_buffer = Vec::new();
             while let Some((key, reader)) = iter.next()? {
                 let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
                 let reader: KvReader<FieldId> = KvReader::new(reader);
@@ -219,6 +225,24 @@ pub(crate) fn write_typed_chunk_into_index(
                     let del_add_reader = KvReaderDelAdd::new(value);
 
                     if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
+                        let addition = match vectors_fid {
+                            // for the "_vectors" field, only keep vectors that are marked as userProvided
+                            Some(vectors_fid) if vectors_fid == field_id => 'vectors: {
+                                vectors_buffer.clear();
+                                let Ok(mut vectors) =
+                                    crate::vector::parsed_vectors::ParsedVectors::from_bytes(
+                                        addition,
+                                    )
+                                else {
+                                    break 'vectors addition;
+                                };
+                                vectors.retain_user_provided_vectors();
+                                serde_json::to_writer(&mut vectors_buffer, &vectors.0)
+                                    .map_err(InternalError::SerdeJson)?;
+                                &vectors_buffer
+                            }
+                            _ => addition,
+                        };
                         writer.insert(field_id, addition)?;
                     }
                 }

From 0462ebbe582ee493d6d18d2ffdd6ac6a6761dcda Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 11:51:27 +0200
Subject: [PATCH 33/56] Don't write an empty _vectors field

---
 milli/src/update/index_documents/typed_chunk.rs | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 8eb9ead28..6f11dd585 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -234,16 +234,23 @@ pub(crate) fn write_typed_chunk_into_index(
                                         addition,
                                     )
                                 else {
-                                    break 'vectors addition;
+                                    break 'vectors Some(addition);
                                 };
                                 vectors.retain_user_provided_vectors();
-                                serde_json::to_writer(&mut vectors_buffer, &vectors.0)
+                                let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors;
+                                if vectors.is_empty() {
+                                    break 'vectors None;
+                                }
+
+                                serde_json::to_writer(&mut vectors_buffer, &vectors)
                                     .map_err(InternalError::SerdeJson)?;
-                                &vectors_buffer
+                                Some(vectors_buffer.as_slice())
                             }
-                            _ => addition,
+                            _ => Some(addition),
                         };
-                        writer.insert(field_id, addition)?;
+                        if let Some(addition) = addition {
+                            writer.insert(field_id, addition)?;
+                        }
                     }
                 }
 

From d05d49ffd8d4c666de5a5528145d313d0fcb2430 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 14 May 2024 14:14:02 +0200
Subject: [PATCH 34/56] Fix tests

---
 meilisearch/tests/search/mod.rs               | 54 +++++++++----------
 meilisearch/tests/search/multi.rs             | 30 +++++------
 .../src/update/index_documents/typed_chunk.rs |  2 +
 3 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index f601e2b03..771eee21b 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -895,9 +895,9 @@ async fn test_score_details() {
                     "id": "166428",
                     "_vectors": {
                       "manual": [
-                        -100,
-                        231,
-                        32
+                        -100.0,
+                        231.0,
+                        32.0
                       ]
                     },
                     "_rankingScoreDetails": {
@@ -1096,9 +1096,9 @@ async fn experimental_feature_vector_store() {
         "id": "287947",
         "_vectors": {
           "manual": [
-            1,
-            2,
-            3
+            1.0,
+            2.0,
+            3.0
           ]
         },
         "_rankingScore": 1.0
@@ -1108,9 +1108,9 @@ async fn experimental_feature_vector_store() {
         "id": "299537",
         "_vectors": {
           "manual": [
-            1,
-            2,
-            54
+            1.0,
+            2.0,
+            54.0
           ]
         },
         "_rankingScore": 0.9129111766815186
@@ -1120,9 +1120,9 @@ async fn experimental_feature_vector_store() {
         "id": "450465",
         "_vectors": {
           "manual": [
-            -100,
-            340,
-            90
+            -100.0,
+            340.0,
+            90.0
           ]
         },
         "_rankingScore": 0.8106412887573242
@@ -1132,9 +1132,9 @@ async fn experimental_feature_vector_store() {
         "id": "166428",
         "_vectors": {
           "manual": [
-            -100,
-            231,
-            32
+            -100.0,
+            231.0,
+            32.0
           ]
         },
         "_rankingScore": 0.7412010431289673
@@ -1144,9 +1144,9 @@ async fn experimental_feature_vector_store() {
         "id": "522681",
         "_vectors": {
           "manual": [
-            10,
-            -23,
-            32
+            10.0,
+            -23.0,
+            32.0
           ]
         },
         "_rankingScore": 0.6972063183784485
@@ -1405,9 +1405,9 @@ async fn simple_search_with_strange_synonyms() {
                 "id": "166428",
                 "_vectors": {
                   "manual": [
-                    -100,
-                    231,
-                    32
+                    -100.0,
+                    231.0,
+                    32.0
                   ]
                 }
               }
@@ -1426,9 +1426,9 @@ async fn simple_search_with_strange_synonyms() {
                 "id": "166428",
                 "_vectors": {
                   "manual": [
-                    -100,
-                    231,
-                    32
+                    -100.0,
+                    231.0,
+                    32.0
                   ]
                 }
               }
@@ -1447,9 +1447,9 @@ async fn simple_search_with_strange_synonyms() {
                 "id": "166428",
                 "_vectors": {
                   "manual": [
-                    -100,
-                    231,
-                    32
+                    -100.0,
+                    231.0,
+                    32.0
                   ]
                 }
               }
diff --git a/meilisearch/tests/search/multi.rs b/meilisearch/tests/search/multi.rs
index aeec1bad4..b5cf8f476 100644
--- a/meilisearch/tests/search/multi.rs
+++ b/meilisearch/tests/search/multi.rs
@@ -75,9 +75,9 @@ async fn simple_search_single_index() {
             "id": "450465",
             "_vectors": {
               "manual": [
-                -100,
-                340,
-                90
+                -100.0,
+                340.0,
+                90.0
               ]
             }
           }
@@ -96,9 +96,9 @@ async fn simple_search_single_index() {
             "id": "299537",
             "_vectors": {
               "manual": [
-                1,
-                2,
-                54
+                1.0,
+                2.0,
+                54.0
               ]
             }
           }
@@ -194,9 +194,9 @@ async fn simple_search_two_indexes() {
             "id": "450465",
             "_vectors": {
               "manual": [
-                -100,
-                340,
-                90
+                -100.0,
+                340.0,
+                90.0
               ]
             }
           }
@@ -227,9 +227,9 @@ async fn simple_search_two_indexes() {
             "cattos": "pésti",
             "_vectors": {
               "manual": [
-                1,
-                2,
-                3
+                1.0,
+                2.0,
+                3.0
               ]
             }
           },
@@ -249,9 +249,9 @@ async fn simple_search_two_indexes() {
             ],
             "_vectors": {
               "manual": [
-                1,
-                2,
-                54
+                1.0,
+                2.0,
+                54.0
               ]
             }
           }
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 6f11dd585..6615a4bc3 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -234,11 +234,13 @@ pub(crate) fn write_typed_chunk_into_index(
                                         addition,
                                     )
                                 else {
+                                    // if the `_vectors` field cannot be parsed as map of vectors, just write it as-is
                                     break 'vectors Some(addition);
                                 };
                                 vectors.retain_user_provided_vectors();
                                 let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors;
                                 if vectors.is_empty() {
+                                    // skip writing empty `_vectors` map
                                     break 'vectors None;
                                 }
 

From 30cf972987327bfb349270bb371875d9180343be Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 16 May 2024 18:11:16 +0200
Subject: [PATCH 35/56] Add test with a dump

---
 dump/src/reader/mod.rs                        | 134 +++
 ...__test__import_dump_v6_with_vectors-5.snap | 783 +++++++++++++++++
 ...__test__import_dump_v6_with_vectors-6.snap | 786 ++++++++++++++++++
 ...__test__import_dump_v6_with_vectors-7.snap | 785 +++++++++++++++++
 ...__test__import_dump_v6_with_vectors-8.snap | 780 +++++++++++++++++
 dump/tests/assets/v6-with-vectors.dump        | Bin 0 -> 17539 bytes
 6 files changed, 3268 insertions(+)
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap
 create mode 100644 dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap
 create mode 100644 dump/tests/assets/v6-with-vectors.dump

diff --git a/dump/src/reader/mod.rs b/dump/src/reader/mod.rs
index 5bbf4ec4d..2b3732164 100644
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@@ -197,6 +197,140 @@ pub(crate) mod test {
     use super::*;
     use crate::reader::v6::RuntimeTogglableFeatures;
 
+    #[test]
+    fn import_dump_v6_with_vectors() {
+        // dump containing two indexes
+        //
+        // "vector", configured with an embedder
+        // contains:
+        // - one document with an overriden vector,
+        // - one document with a natural vector
+        // - one document with a _vectors map containing one additional embedder name and a natural vector
+        // - one document with a _vectors map containing one additional embedder name and an overriden vector
+        //
+        // "novector", no embedder
+        // contains:
+        // - a document without vector
+        // - a document with a random _vectors field
+        let dump = File::open("tests/assets/v6-with-vectors.dump").unwrap();
+        let mut dump = DumpReader::open(dump).unwrap();
+
+        // top level infos
+        insta::assert_display_snapshot!(dump.date().unwrap(), @"2024-05-16 15:51:34.151044 +00:00:00");
+        insta::assert_debug_snapshot!(dump.instance_uid().unwrap(), @"None");
+
+        // tasks
+        let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"278f63325ef06ca04d01df98d8207b94");
+        assert_eq!(update_files.len(), 10);
+        assert!(update_files[0].is_none()); // the dump creation
+        assert!(update_files[1].is_none());
+        assert!(update_files[2].is_none());
+        assert!(update_files[3].is_none());
+        assert!(update_files[4].is_none());
+        assert!(update_files[5].is_none());
+        assert!(update_files[6].is_none());
+        assert!(update_files[7].is_none());
+        assert!(update_files[8].is_none());
+        assert!(update_files[9].is_none());
+
+        // indexes
+        let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
+        // the index are not ordered in any way by default
+        indexes.sort_by_key(|index| index.metadata().uid.to_string());
+
+        let mut vector_index = indexes.pop().unwrap();
+        let mut novector_index = indexes.pop().unwrap();
+        assert!(indexes.is_empty());
+
+        // vector
+
+        insta::assert_json_snapshot!(vector_index.metadata(), @r###"
+        {
+          "uid": "vector",
+          "primaryKey": "id",
+          "createdAt": "2024-05-16T15:33:17.240962Z",
+          "updatedAt": "2024-05-16T15:40:55.723052Z"
+        }
+        "###);
+
+        {
+            let documents: Result<Vec<_>> = vector_index.documents().unwrap().collect();
+            let mut documents = documents.unwrap();
+            assert_eq!(documents.len(), 4);
+
+            documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
+
+            {
+                let document = documents.pop().unwrap();
+                insta::assert_json_snapshot!(document);
+            }
+
+            {
+                let document = documents.pop().unwrap();
+                insta::assert_json_snapshot!(document);
+            }
+
+            {
+                let document = documents.pop().unwrap();
+                insta::assert_json_snapshot!(document);
+            }
+
+            {
+                let document = documents.pop().unwrap();
+                insta::assert_json_snapshot!(document);
+            }
+        }
+
+        // novector
+
+        insta::assert_json_snapshot!(novector_index.metadata(), @r###"
+        {
+          "uid": "novector",
+          "primaryKey": "id",
+          "createdAt": "2024-05-16T15:33:03.568055Z",
+          "updatedAt": "2024-05-16T15:33:07.530217Z"
+        }
+        "###);
+
+        insta::assert_json_snapshot!(novector_index.settings().unwrap().embedders, @"null");
+
+        {
+            let documents: Result<Vec<_>> = novector_index.documents().unwrap().collect();
+            let mut documents = documents.unwrap();
+            assert_eq!(documents.len(), 2);
+
+            documents.sort_by_key(|doc| doc.get("id").unwrap().to_string());
+
+            {
+                let document = documents.pop().unwrap();
+                insta::assert_json_snapshot!(document, @r###"
+                {
+                  "id": "e1",
+                  "other": "random1",
+                  "_vectors": "toto"
+                }
+                "###);
+            }
+
+            {
+                let document = documents.pop().unwrap();
+                insta::assert_json_snapshot!(document, @r###"
+                {
+                  "id": "e0",
+                  "other": "random0"
+                }
+                "###);
+            }
+        }
+
+        assert_eq!(
+            dump.features().unwrap().unwrap(),
+            RuntimeTogglableFeatures { vector_store: true, ..Default::default() }
+        );
+    }
+
     #[test]
     fn import_dump_v6_experimental() {
         let dump = File::open("tests/assets/v6-with-experimental.dump").unwrap();
diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap
new file mode 100644
index 000000000..43bdb9726
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-5.snap
@@ -0,0 +1,783 @@
+---
+source: dump/src/reader/mod.rs
+expression: document
+---
+{
+  "id": "e3",
+  "desc": "overriden vector + map",
+  "_vectors": {
+    "default": [
+      0.2,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1
+    ],
+    "toto": [
+      0.1
+    ]
+  }
+}
diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap
new file mode 100644
index 000000000..0aad0ea97
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-6.snap
@@ -0,0 +1,786 @@
+---
+source: dump/src/reader/mod.rs
+expression: document
+---
+{
+  "id": "e2",
+  "desc": "natural vector + map",
+  "_vectors": {
+    "toto": [],
+    "default": {
+      "embeddings": [
+        [
+          -0.05189208313822746,
+          -0.9273212552070618,
+          0.1443813145160675,
+          0.0932632014155388,
+          0.2665371894836426,
+          0.36266782879829407,
+          0.6402910947799683,
+          0.32014018297195435,
+          0.030915971845388412,
+          -0.9312191605567932,
+          -0.3718109726905823,
+          -0.2700554132461548,
+          -1.1014580726623535,
+          0.9154956936836244,
+          -0.3406888246536255,
+          1.0077725648880005,
+          0.6577560901641846,
+          -0.3955195546150207,
+          -0.4148270785808563,
+          0.1855088472366333,
+          0.5062315464019775,
+          -0.3632686734199524,
+          -0.2277890294790268,
+          0.2560805082321167,
+          -0.3853609561920166,
+          -0.1604762226343155,
+          -0.13947471976280212,
+          -0.20147813856601715,
+          -0.4466346800327301,
+          -0.3761846721172333,
+          0.1443382054567337,
+          0.18205296993255615,
+          0.49359792470932007,
+          -0.22538000345230105,
+          -0.4996317625045776,
+          -0.22734887897968292,
+          -0.6034309267997742,
+          -0.7857939600944519,
+          -0.34923747181892395,
+          -0.3466345965862274,
+          0.21176661550998688,
+          -0.5101462006568909,
+          -0.3403083384037018,
+          0.000315118464641273,
+          0.236465722322464,
+          -0.10246097296476364,
+          -1.3013339042663574,
+          0.3419138789176941,
+          -0.32963496446609497,
+          -0.0901619717478752,
+          -0.5426247119903564,
+          0.22656650841236117,
+          -0.44758284091949463,
+          0.14151698350906372,
+          -0.1089438870549202,
+          0.5500766634941101,
+          -0.670711100101471,
+          -0.6227269768714905,
+          0.3894464075565338,
+          -0.27609574794769287,
+          0.7028202414512634,
+          -0.19697771966457367,
+          0.328511506319046,
+          0.5063360929489136,
+          0.4065195322036743,
+          0.2614171802997589,
+          -0.30274391174316406,
+          1.0393824577331543,
+          -0.7742937207221985,
+          -0.7874112129211426,
+          -0.6749666929244995,
+          0.5190866589546204,
+          0.004123548045754433,
+          -0.28312963247299194,
+          -0.038731709122657776,
+          -1.0142987966537476,
+          -0.09519586712121964,
+          0.8755272626876831,
+          0.4876938760280609,
+          0.7811151742935181,
+          0.85174959897995,
+          0.11826585978269576,
+          0.5373436808586121,
+          0.3649002015590668,
+          0.19064077734947205,
+          -0.00287026260048151,
+          -0.7305403351783752,
+          -0.015206154435873032,
+          -0.7899249196052551,
+          0.19407285749912265,
+          0.08596625179052353,
+          -0.28976231813430786,
+          -0.1525907665491104,
+          0.3798313438892365,
+          0.050306469202041626,
+          -0.5697937607765198,
+          0.4219021201133728,
+          0.276252806186676,
+          0.1559903472661972,
+          0.10030482709407806,
+          -0.4043720066547394,
+          -0.1969818025827408,
+          0.5739826560020447,
+          0.2116064727306366,
+          -1.4620544910430908,
+          -0.7802462577819824,
+          -0.24739810824394223,
+          -0.09791352599859238,
+          -0.4413802027702331,
+          0.21549351513385773,
+          -0.9520436525344848,
+          -0.08762510865926743,
+          0.08154498040676117,
+          -0.6154940724372864,
+          -1.01079523563385,
+          0.885427713394165,
+          0.6967288851737976,
+          0.27186504006385803,
+          -0.43194177746772766,
+          -0.11248451471328735,
+          0.7576630711555481,
+          0.4998855590820313,
+          0.0264343973249197,
+          0.9872855544090272,
+          0.5634694695472717,
+          0.053698331117630005,
+          0.19410227239131927,
+          0.3570743501186371,
+          -0.23670297861099243,
+          -0.9114483594894408,
+          0.07884842902421951,
+          0.7318344116210938,
+          0.44630110263824463,
+          0.08745364099740982,
+          -0.347101628780365,
+          -0.4314247667789459,
+          -0.5060274004936218,
+          0.003706763498485088,
+          0.44320008158683777,
+          -0.00788921769708395,
+          -0.1368623524904251,
+          -0.17391923069953918,
+          0.14473655819892883,
+          0.10927865654230118,
+          0.6974599361419678,
+          0.005052129738032818,
+          -0.016953065991401672,
+          -0.1256176233291626,
+          -0.036742497235536575,
+          0.5591985583305359,
+          -0.37619709968566895,
+          0.22429119050502777,
+          0.5403043031692505,
+          -0.8603790998458862,
+          -0.3456307053565979,
+          0.9292937517166138,
+          0.5074859261512756,
+          0.6310645937919617,
+          -0.3091641068458557,
+          0.46902573108673096,
+          0.7891915440559387,
+          0.4499550759792328,
+          0.2744995653629303,
+          0.2712305784225464,
+          -0.04349074140191078,
+          -0.3638863265514374,
+          0.7839881777763367,
+          0.7352104783058167,
+          -0.19457511603832245,
+          -0.5957832932472229,
+          -0.43704694509506226,
+          -1.084769368171692,
+          0.4904985725879669,
+          0.5385226011276245,
+          0.1891629993915558,
+          0.12338479608297348,
+          0.8315675258636475,
+          -0.07830192148685455,
+          1.0916285514831543,
+          -0.28066861629486084,
+          -1.3585069179534912,
+          0.5203898549079895,
+          0.08678033947944641,
+          -0.2566044330596924,
+          0.09484415501356123,
+          -0.0180208683013916,
+          1.0264745950698853,
+          -0.023572135716676712,
+          0.5864979028701782,
+          0.7625196576118469,
+          -0.2543414533138275,
+          -0.8877770900726318,
+          0.7611982822418213,
+          -0.06220436468720436,
+          0.937336564064026,
+          0.2704363465309143,
+          -0.37733694911003113,
+          0.5076137781143188,
+          -0.30641937255859375,
+          0.6252772808074951,
+          -0.0823579877614975,
+          -0.03736555948853493,
+          0.4131673276424408,
+          -0.6514252424240112,
+          0.12918265163898468,
+          -0.4483584463596344,
+          0.6750786304473877,
+          -0.37008383870124817,
+          -0.02324833907186985,
+          0.38027650117874146,
+          -0.26374951004981995,
+          0.4346931278705597,
+          0.42882832884788513,
+          -0.48798441886901855,
+          1.1882442235946655,
+          0.5132288336753845,
+          0.5284568667411804,
+          -0.03538886830210686,
+          0.29620853066444397,
+          -1.0683696269989014,
+          0.25936177372932434,
+          0.10404160618782043,
+          -0.25796034932136536,
+          0.027896970510482788,
+          -0.09225251525640488,
+          1.4811025857925415,
+          0.641173779964447,
+          -0.13838383555412292,
+          -0.3437179923057556,
+          0.5667019486427307,
+          -0.5400741696357727,
+          0.31090837717056274,
+          0.6470608115196228,
+          -0.3747067153453827,
+          -0.7364534735679626,
+          -0.07431528717279434,
+          0.5173454880714417,
+          -0.6578747034072876,
+          0.7107478976249695,
+          -0.7918999791145325,
+          -0.0648345872759819,
+          0.609937846660614,
+          -0.7329513430595398,
+          0.9741371870040894,
+          0.17912346124649048,
+          -0.02658769302070141,
+          0.5162150859832764,
+          -0.3978803157806397,
+          -0.7833885550498962,
+          -0.6497276425361633,
+          -0.3898126780986786,
+          -0.0952848568558693,
+          0.2663288116455078,
+          -0.1604052186012268,
+          0.373076468706131,
+          -0.8357769250869751,
+          -0.05217683315277099,
+          -0.2680160701274872,
+          0.8389158248901367,
+          0.6833611130714417,
+          -0.6712407469749451,
+          0.7406917214393616,
+          -0.44522786140441895,
+          -0.34645363688468933,
+          -0.27384576201438904,
+          -0.9878405928611756,
+          -0.8166060447692871,
+          0.06268279999494553,
+          0.38567957282066345,
+          -0.3274703919887543,
+          0.5296315550804138,
+          -0.11810623109340668,
+          0.23029841482639313,
+          0.08616159111261368,
+          -0.2195747196674347,
+          0.09430307894945145,
+          0.4057176411151886,
+          0.4892159104347229,
+          -0.1636916548013687,
+          -0.6071445345878601,
+          0.41256585717201233,
+          0.622254490852356,
+          -0.41223976016044617,
+          -0.6686707139015198,
+          -0.7474371790885925,
+          -0.8509522080421448,
+          -0.16754287481307983,
+          -0.9078601002693176,
+          -0.29653599858283997,
+          -0.5020652413368225,
+          0.4692700505256653,
+          0.01281109917908907,
+          -0.16071580350399017,
+          0.03388889133930206,
+          -0.020511148497462273,
+          0.5027827024459839,
+          -0.20729811489582065,
+          0.48107290267944336,
+          0.33669769763946533,
+          -0.5275911688804626,
+          0.48271527886390686,
+          0.2738940715789795,
+          -0.033152539283037186,
+          -0.13629786670207977,
+          -0.05965912342071533,
+          -0.26200807094573975,
+          0.04002794995903969,
+          -0.34095603227615356,
+          -3.986898899078369,
+          -0.46819332242012024,
+          -0.422744482755661,
+          -0.169097900390625,
+          0.6008929014205933,
+          0.058016058057546616,
+          -0.11401277780532836,
+          -0.3077819049358368,
+          -0.09595538675785063,
+          0.6723822355270386,
+          0.19367831945419312,
+          0.28304359316825867,
+          0.1609862744808197,
+          0.7567598819732666,
+          0.6889985799789429,
+          0.06907720118761063,
+          -0.04188092052936554,
+          -0.7434936165809631,
+          0.13321782648563385,
+          0.8456063270568848,
+          -0.10364038497209548,
+          -0.45084846019744873,
+          -0.4758241474628449,
+          0.43882066011428833,
+          -0.6432598829269409,
+          0.7217311859130859,
+          -0.24189773201942444,
+          0.12737572193145752,
+          -1.1008601188659668,
+          -0.3305315673351288,
+          0.14614742994308472,
+          -0.7819333076477051,
+          0.5287120342254639,
+          -0.055538054555654526,
+          0.1877404749393463,
+          -0.6907662153244019,
+          0.5616975426673889,
+          -0.4611121714115143,
+          -0.26109233498573303,
+          -0.12898315489292145,
+          -0.3724522292613983,
+          -0.7191406488418579,
+          -0.4425233602523804,
+          -0.644108235836029,
+          0.8424481153488159,
+          0.17532426118850708,
+          -0.5121750235557556,
+          -0.6467239260673523,
+          -0.0008507720194756985,
+          0.7866212129592896,
+          -0.02644744887948036,
+          -0.005045140627771616,
+          0.015782782807946205,
+          0.16334445774555206,
+          -0.1913367658853531,
+          -0.13697923719882965,
+          -0.6684983372688293,
+          0.18346354365348816,
+          -0.341105580329895,
+          0.5427411198616028,
+          0.3779832422733307,
+          -0.6778115034103394,
+          -0.2931850254535675,
+          -0.8805161714553833,
+          -0.4212774932384491,
+          -0.5368952751159668,
+          -1.3937891721725464,
+          -1.225494146347046,
+          0.4276703894138336,
+          1.1205668449401855,
+          -0.6005299687385559,
+          0.15732505917549133,
+          -0.3914784789085388,
+          -1.357046604156494,
+          -0.4707142114639282,
+          -0.1497287154197693,
+          -0.25035548210144043,
+          -0.34328439831733704,
+          0.39083412289619446,
+          0.1623048633337021,
+          -0.9275814294815063,
+          -0.6430015563964844,
+          0.2973862886428833,
+          0.5580436587333679,
+          -0.6232585310935974,
+          -0.6611042022705078,
+          0.4015969038009643,
+          -1.0232892036437988,
+          -0.2585645020008087,
+          -0.5431421399116516,
+          0.5021264553070068,
+          -0.48601630330085754,
+          -0.010242084041237833,
+          0.5862035155296326,
+          0.7316920161247253,
+          0.4036808013916016,
+          0.4269520044326782,
+          -0.705938458442688,
+          0.7747307419776917,
+          0.10164368897676468,
+          0.7887958884239197,
+          -0.9612497091293336,
+          0.12755516171455383,
+          0.06812842190265656,
+          -0.022603651508688927,
+          0.14722754061222076,
+          -0.5588505268096924,
+          -0.20689940452575684,
+          0.3557641804218292,
+          -0.6812759637832642,
+          0.2860803008079529,
+          -0.38954633474349976,
+          0.1759403496980667,
+          -0.5678874850273132,
+          -0.1692986786365509,
+          -0.14578519761562347,
+          0.5711379051208496,
+          1.0208125114440918,
+          0.7759483456611633,
+          -0.372348427772522,
+          -0.5460885763168335,
+          0.7190321683883667,
+          -0.6914990544319153,
+          0.13365162909030914,
+          -0.4854792356491089,
+          0.4054908752441406,
+          0.4502798914909363,
+          -0.3041122555732727,
+          -0.06726965308189392,
+          -0.05570871382951737,
+          -0.0455719493329525,
+          0.4785125255584717,
+          0.8867972493171692,
+          0.4107886850833893,
+          0.6121342182159424,
+          -0.20477132499217987,
+          -0.5598517656326294,
+          -0.6443566679954529,
+          -0.5905212759971619,
+          -0.5571200251579285,
+          0.17573799192905426,
+          -0.28621870279312134,
+          0.1685224026441574,
+          0.09719007462263109,
+          -0.04223639518022537,
+          -0.28623101115226746,
+          -0.1449810117483139,
+          -0.3789580464363098,
+          -0.5227636098861694,
+          -0.049728814512491226,
+          0.7849089503288269,
+          0.16792525351047516,
+          0.9849340915679932,
+          -0.6559549570083618,
+          0.35723909735679626,
+          -0.6822739243507385,
+          1.2873116731643677,
+          0.19993330538272855,
+          0.03512010723352432,
+          -0.6972134113311768,
+          0.18453484773635864,
+          -0.2437680810689926,
+          0.2156416028738022,
+          0.5230382680892944,
+          0.22020135819911957,
+          0.8314080238342285,
+          0.15627102553844452,
+          -0.7330264449119568,
+          0.3888184726238251,
+          -0.22034703195095065,
+          0.5457669496536255,
+          -0.48084837198257446,
+          -0.45576658844947815,
+          -0.09287727624177931,
+          -0.06968110054731369,
+          0.35125672817230225,
+          -0.4278119504451752,
+          0.2038476765155792,
+          0.11392722278833388,
+          0.9433983564376832,
+          -0.4097744226455689,
+          0.035297419875860214,
+          -0.4274404048919678,
+          -0.25100165605545044,
+          1.0943366289138794,
+          -0.07634022831916809,
+          -0.2925529479980469,
+          -0.7512530088424683,
+          0.2649727463722229,
+          -0.4078235328197479,
+          -0.3372223973274231,
+          0.05190162733197212,
+          0.005654910113662481,
+          -0.0001571219472680241,
+          -0.35445958375930786,
+          -0.7837416529655457,
+          0.1500556766986847,
+          0.4383024573326111,
+          0.6099548935890198,
+          0.05951934307813645,
+          -0.21325334906578064,
+          0.0199207104742527,
+          -0.22704418003559113,
+          -0.6481077671051025,
+          0.37442275881767273,
+          -1.015955924987793,
+          0.38637226819992065,
+          -0.06489371508359909,
+          -0.494120329618454,
+          0.3469836115837097,
+          0.15402406454086304,
+          -0.7660972476005554,
+          -0.7053225040435791,
+          -0.25964751839637756,
+          0.014004424214363098,
+          -0.2860170006752014,
+          -0.17565494775772095,
+          -0.45117494463920593,
+          -0.0031954257283359766,
+          0.09676837921142578,
+          -0.514464259147644,
+          0.41698193550109863,
+          -0.21642713248729703,
+          -0.5398141145706177,
+          -0.3647628426551819,
+          0.37005379796028137,
+          0.239425927400589,
+          -0.08833975344896317,
+          0.934946596622467,
+          -0.48340797424316406,
+          0.6241437792778015,
+          -0.7253676652908325,
+          -0.04303571209311485,
+          1.1125205755233765,
+          -0.15692919492721558,
+          -0.2914651036262512,
+          -0.5117168426513672,
+          0.21365483105182648,
+          0.4924402534961701,
+          0.5269662141799927,
+          0.0352792888879776,
+          -0.149167999625206,
+          -0.6019760370254517,
+          0.08245442807674408,
+          0.4900692105293274,
+          0.518824577331543,
+          -0.00005570516441366635,
+          -0.553304135799408,
+          0.22217543423175812,
+          0.5047767758369446,
+          0.135724738240242,
+          1.1511540412902832,
+          -0.3541218340396881,
+          -0.9712511897087096,
+          0.8353699445724487,
+          -0.39227569103240967,
+          -0.9117669463157654,
+          -0.26349931955337524,
+          0.05597023293375969,
+          0.20695461332798004,
+          0.3178807199001312,
+          1.0663238763809204,
+          0.5062212347984314,
+          0.7288597822189331,
+          0.09899299591779707,
+          0.553720235824585,
+          0.675009548664093,
+          -0.20067055523395536,
+          0.3138423264026642,
+          -0.6886593103408813,
+          -0.2910398542881012,
+          -1.3186300992965698,
+          -0.4684459865093231,
+          -0.095743365585804,
+          -0.1257995069026947,
+          -0.4858281314373016,
+          -0.4935407340526581,
+          -0.3266896903514862,
+          -0.3928797245025635,
+          -0.40803104639053345,
+          -0.9975396394729614,
+          0.4229583740234375,
+          0.37309643626213074,
+          0.4431034922599793,
+          0.30364808440208435,
+          -0.3765178918838501,
+          0.5616499185562134,
+          0.16904796659946442,
+          -0.7343707084655762,
+          0.2560209631919861,
+          0.6166825294494629,
+          0.3200829327106476,
+          -0.4483652710914612,
+          0.16224201023578644,
+          -0.31495288014411926,
+          -0.42713335156440735,
+          0.7270734906196594,
+          0.7049484848976135,
+          -0.0571461021900177,
+          0.04477125033736229,
+          -0.6647796034812927,
+          1.183672308921814,
+          0.36199676990509033,
+          0.046881116926670074,
+          0.4515796303749085,
+          0.9278061985969543,
+          0.31471705436706543,
+          -0.7073333859443665,
+          -0.3443860113620758,
+          0.5440067052841187,
+          -0.15020819008350372,
+          -0.541202962398529,
+          0.5203295946121216,
+          1.2192286252975464,
+          -0.9983593225479126,
+          -0.18758884072303772,
+          0.2758221924304962,
+          -0.6511523723602295,
+          -0.1584404855966568,
+          -0.236241415143013,
+          0.2692437767982483,
+          -0.4941152036190033,
+          0.4987454116344452,
+          -0.3331359028816223,
+          0.3163745701313019,
+          0.745529294013977,
+          -0.2905873656272888,
+          0.13602906465530396,
+          0.4679684340953827,
+          1.0555986166000366,
+          1.075700044631958,
+          0.5368486046791077,
+          -0.5118206739425659,
+          0.8668332099914551,
+          -0.5726966857910156,
+          -0.7811751961708069,
+          0.1938626915216446,
+          -0.1929349899291992,
+          0.1757766306400299,
+          0.6384295225143433,
+          0.26462844014167786,
+          0.9542630314826964,
+          0.19313029944896695,
+          1.264248013496399,
+          -0.6304428577423096,
+          0.0487106591463089,
+          -0.16211535036563873,
+          -0.7894763350486755,
+          0.3582514822483063,
+          -0.04153040423989296,
+          0.635784387588501,
+          0.6554391980171204,
+          -0.47010496258735657,
+          -0.8302040696144104,
+          -0.1350124627351761,
+          0.2568812072277069,
+          0.13614831864833832,
+          -0.2563649117946625,
+          -1.0434694290161133,
+          0.3232482671737671,
+          0.47882452607154846,
+          0.4298652410507202,
+          1.0563770532608032,
+          -0.28917592763900757,
+          -0.8533256649971008,
+          0.10648339986801147,
+          0.6376127004623413,
+          -0.20832888782024384,
+          0.2370245456695557,
+          0.0018312990432605147,
+          -0.2034837007522583,
+          0.01051164511591196,
+          -1.105310082435608,
+          0.29724350571632385,
+          0.15604574978351593,
+          0.1973688006401062,
+          0.44394731521606445,
+          0.3974513411521912,
+          -0.13625948131084442,
+          0.9571986198425292,
+          0.2257384955883026,
+          0.2323588728904724,
+          -0.5583669543266296,
+          -0.7854922413825989,
+          0.1647188365459442,
+          -1.6098142862319946,
+          0.318587988615036,
+          -0.13399995863437653,
+          -0.2172701060771942,
+          -0.767514705657959,
+          -0.5813586711883545,
+          -0.3195130527019501,
+          -0.04894036799669266,
+          0.2929930090904236,
+          -0.8213384747505188,
+          0.07181350141763687,
+          0.7469993829727173,
+          0.6407455801963806,
+          0.16365697979927063,
+          0.7870153188705444,
+          0.6524736881256104,
+          0.6399973630905151,
+          -0.04992736503481865,
+          -0.03959266096353531,
+          -0.2512352466583252,
+          0.8448855876922607,
+          -0.1422702670097351,
+          0.1216789186000824,
+          -1.2647287845611572,
+          0.5931149125099182,
+          0.7186052203178406,
+          -0.06118432432413101,
+          -1.1942816972732544,
+          -0.17677085101604462,
+          0.31543800234794617,
+          -0.32252824306488037,
+          0.8255583047866821,
+          -0.14529970288276672,
+          -0.2695446312427521,
+          -0.33378756046295166,
+          -0.1653425395488739,
+          0.1454019844532013,
+          -0.3920115828514099,
+          0.912214994430542,
+          -0.7279734015464783,
+          0.7374742031097412,
+          0.933980405330658,
+          0.13429680466651917,
+          -0.514870285987854,
+          0.3989711999893189,
+          -0.11613689363002776,
+          0.4022413492202759,
+          -0.9990655779838562,
+          -0.33749932050704956,
+          -0.4334589838981629,
+          -1.376373291015625,
+          -0.2993924915790558,
+          -0.09454808384180068,
+          -0.01314175222069025,
+          -0.001090060803107917,
+          0.2137461006641388,
+          0.2938512861728668,
+          0.17508235573768616,
+          0.8260607123374939,
+          -0.7218498587608337,
+          0.2414487451314926,
+          -0.47296759486198425,
+          -0.3002610504627228,
+          -1.238540768623352,
+          0.08663805574178696,
+          0.6805586218833923,
+          0.5909030437469482,
+          -0.42807504534721375,
+          -0.22887496650218964,
+          0.47537800669670105,
+          -1.0474627017974854,
+          0.6338009238243103,
+          0.06548397243022919,
+          0.4971011281013489,
+          1.3484878540039063
+        ]
+      ],
+      "userProvided": false
+    }
+  }
+}
diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap
new file mode 100644
index 000000000..f2a5e1d69
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-7.snap
@@ -0,0 +1,785 @@
+---
+source: dump/src/reader/mod.rs
+expression: document
+---
+{
+  "id": "e1",
+  "desc": "natural vector",
+  "_vectors": {
+    "default": {
+      "embeddings": [
+        [
+          -0.2979458272457123,
+          -0.5288640856742859,
+          -0.019957859069108963,
+          -0.18495318293571472,
+          0.7429973483085632,
+          0.5238497257232666,
+          0.432366281747818,
+          0.32744166254997253,
+          0.0020762972999364138,
+          -0.9507834911346436,
+          -0.35097137093544006,
+          0.08469701558351517,
+          -1.4176613092422483,
+          0.4647577106952667,
+          -0.69340580701828,
+          1.0372896194458008,
+          0.3716741800308227,
+          0.06031008064746857,
+          -0.6152024269104004,
+          0.007914665155112743,
+          0.7954924702644348,
+          -0.20773003995418549,
+          0.09376765787601472,
+          0.04508133605122566,
+          -0.2084471583366394,
+          -0.1518009901046753,
+          0.018195509910583496,
+          -0.07044368237257004,
+          -0.18119366466999057,
+          -0.4480230510234833,
+          0.3822529911994934,
+          0.1911812424659729,
+          0.4674372375011444,
+          0.06963984668254852,
+          -0.09341949224472046,
+          0.005675444379448891,
+          -0.6774799227714539,
+          -0.7066726684570313,
+          -0.39256376028060913,
+          0.04005039855837822,
+          0.2084812968969345,
+          -0.7872875928878784,
+          -0.8205880522727966,
+          0.2919981777667999,
+          -0.06004738807678223,
+          -0.4907574355602264,
+          -1.5937862396240234,
+          0.24249385297298431,
+          -0.14709846675395966,
+          -0.11860740929841997,
+          -0.8299489617347717,
+          0.472964346408844,
+          -0.497518390417099,
+          -0.22205302119255063,
+          -0.4196169078350067,
+          0.32697558403015137,
+          -0.360930860042572,
+          -0.9789686799049376,
+          0.1887447088956833,
+          -0.403737336397171,
+          0.18524253368377688,
+          0.3768732249736786,
+          0.3666233420372009,
+          0.3511938452720642,
+          0.6985810995101929,
+          0.41721710562705994,
+          0.09754953533411026,
+          0.6204307079315186,
+          -1.0762996673583984,
+          -0.06263761967420578,
+          -0.7376511693000793,
+          0.6849768161773682,
+          -0.1745152473449707,
+          -0.40449759364128113,
+          0.20757411420345304,
+          -0.8424443006515503,
+          0.330015629529953,
+          0.3489064872264862,
+          1.0954371690750122,
+          0.8487558960914612,
+          1.1076823472976685,
+          0.61430823802948,
+          0.4155903458595276,
+          0.4111340939998626,
+          0.05753209814429283,
+          -0.06429877132177353,
+          -0.765606164932251,
+          -0.41703930497169495,
+          -0.508820652961731,
+          0.19859947264194489,
+          -0.16607828438282013,
+          -0.28112146258354187,
+          0.11032675206661224,
+          0.38809511065483093,
+          -0.36498191952705383,
+          -0.48671194911003113,
+          0.6755134463310242,
+          0.03958442434668541,
+          0.4478721618652344,
+          -0.10335399955511092,
+          -0.9546685814857484,
+          -0.6087718605995178,
+          0.17498846352100372,
+          0.08320838958024979,
+          -1.4478336572647097,
+          -0.605027437210083,
+          -0.5867993235588074,
+          -0.14711688458919525,
+          -0.5447602272033691,
+          -0.026259321719408035,
+          -0.6997418403625488,
+          -0.07349082082509995,
+          0.10638900846242905,
+          -0.7133527398109436,
+          -0.9396815299987792,
+          1.087092399597168,
+          1.1885089874267578,
+          0.4011896848678589,
+          -0.4089202582836151,
+          -0.10938972979784012,
+          0.6726722121238708,
+          0.24576938152313232,
+          -0.24247920513153076,
+          1.1499971151351929,
+          0.47813335061073303,
+          -0.05331678315997124,
+          0.32338133454322815,
+          0.4870913326740265,
+          -0.23144258558750153,
+          -1.2023426294326782,
+          0.2349330335855484,
+          1.080536961555481,
+          0.29334118962287903,
+          0.391574501991272,
+          -0.15818795561790466,
+          -0.2948290705680847,
+          -0.024689948186278343,
+          0.06602869182825089,
+          0.5937030911445618,
+          -0.047901444137096405,
+          -0.512734591960907,
+          -0.35780075192451477,
+          0.28751692175865173,
+          0.4298716187477112,
+          0.9242428541183472,
+          -0.17208744585514069,
+          0.11515070497989656,
+          -0.0335976779460907,
+          -0.3422986567020416,
+          0.5344581604003906,
+          0.19895796477794647,
+          0.33001241087913513,
+          0.6390730142593384,
+          -0.6074934005737305,
+          -0.2553696632385254,
+          0.9644920229911804,
+          0.2699219584465027,
+          0.6403993368148804,
+          -0.6380003690719604,
+          -0.027310986071825027,
+          0.638815701007843,
+          0.27719101309776306,
+          -0.13553589582443237,
+          0.750195324420929,
+          0.1224869191646576,
+          -0.20613941550254825,
+          0.8444448709487915,
+          0.16200250387191772,
+          -0.24750925600528717,
+          -0.739950954914093,
+          -0.28443849086761475,
+          -1.176282525062561,
+          0.516107976436615,
+          0.3774825632572174,
+          0.10906043648719788,
+          0.07962015271186829,
+          0.7384604215621948,
+          -0.051241904497146606,
+          1.1730090379714966,
+          -0.4828610122203827,
+          -1.404372215270996,
+          0.8811132311820984,
+          -0.3839482367038727,
+          0.022516896948218346,
+          -0.0491158664226532,
+          -0.43027013540267944,
+          1.2049334049224854,
+          -0.27309560775756836,
+          0.6883630752563477,
+          0.8264574408531189,
+          -0.5020735263824463,
+          -0.4874092042446137,
+          0.6007202863693237,
+          -0.4965405762195587,
+          1.1302915811538696,
+          0.032572727650403976,
+          -0.3731859028339386,
+          0.658271849155426,
+          -0.9023059010505676,
+          0.7400162220001221,
+          0.014550759457051754,
+          -0.19699542224407196,
+          0.2319706380367279,
+          -0.789058268070221,
+          -0.14905710518360138,
+          -0.5826214551925659,
+          0.207652747631073,
+          -0.4507439732551574,
+          -0.3163885474205017,
+          0.3604124188423157,
+          -0.45119962096214294,
+          0.3428427278995514,
+          0.3005594313144684,
+          -0.36026081442832947,
+          1.1014249324798584,
+          0.40884315967559814,
+          0.34991952776908875,
+          -0.1806638240814209,
+          0.27440476417541504,
+          -0.7118373513221741,
+          0.4645499587059021,
+          0.214790478348732,
+          -0.2343102991580963,
+          0.10500429570674896,
+          -0.28034430742263794,
+          1.2267805337905884,
+          1.0561333894729614,
+          -0.497364342212677,
+          -0.6143305897712708,
+          0.24963727593421936,
+          -0.33136463165283203,
+          -0.01473914459347725,
+          0.495918869972229,
+          -0.6985538005828857,
+          -1.0033197402954102,
+          0.35937801003456116,
+          0.6325868368148804,
+          -0.6808838844299316,
+          1.0354058742523191,
+          -0.7214401960372925,
+          -0.33318862318992615,
+          0.874398410320282,
+          -0.6594992280006409,
+          0.6830640435218811,
+          -0.18534131348133087,
+          0.024834271520376205,
+          0.19901277124881744,
+          -0.5992477536201477,
+          -1.2126628160476685,
+          -0.9245557188987732,
+          -0.3898217976093292,
+          -0.1286519467830658,
+          0.4217943847179413,
+          -0.1143646091222763,
+          0.5630772709846497,
+          -0.5240639448165894,
+          0.21152715384960177,
+          -0.3792001008987427,
+          0.8266305327415466,
+          1.170984387397766,
+          -0.8072142004966736,
+          0.11382893472909927,
+          -0.17953898012638092,
+          -0.1789460331201553,
+          -0.15078622102737427,
+          -1.2082908153533936,
+          -0.7812382578849792,
+          -0.10903695970773696,
+          0.7303897142410278,
+          -0.39054441452026367,
+          0.19511254131793976,
+          -0.09121843427419662,
+          0.22400228679180145,
+          0.30143046379089355,
+          0.1141919493675232,
+          0.48112115263938904,
+          0.7307931780815125,
+          0.09701362252235413,
+          -0.2795647978782654,
+          -0.3997688889503479,
+          0.5540812611579895,
+          0.564578115940094,
+          -0.40065160393714905,
+          -0.3629159033298493,
+          -0.3789091110229492,
+          -0.7298538088798523,
+          -0.6996853351593018,
+          -0.4477842152118683,
+          -0.289089560508728,
+          -0.6430277824401855,
+          0.2344944179058075,
+          0.3742927014827728,
+          -0.5079357028007507,
+          0.28841453790664673,
+          0.06515737622976303,
+          0.707315981388092,
+          0.09498685598373412,
+          0.8365515470504761,
+          0.10002726316452026,
+          -0.7695478200912476,
+          0.6264724135398865,
+          0.7562043070793152,
+          -0.23112858831882477,
+          -0.2871039807796478,
+          -0.25010058283805847,
+          0.2783474028110504,
+          -0.03224996477365494,
+          -0.9119359850883484,
+          -3.6940200328826904,
+          -0.5099936127662659,
+          -0.1604711413383484,
+          0.17453284561634064,
+          0.41759559512138367,
+          0.1419190913438797,
+          -0.11362407356500626,
+          -0.33312007784843445,
+          0.11511333286762238,
+          0.4667884409427643,
+          -0.0031647447030991316,
+          0.15879854559898376,
+          0.3042248487472534,
+          0.5404849052429199,
+          0.8515422344207764,
+          0.06286454200744629,
+          0.43790125846862793,
+          -0.8682025074958801,
+          -0.06363756954669952,
+          0.5547921657562256,
+          -0.01483887154608965,
+          -0.07361344993114471,
+          -0.929947018623352,
+          0.3502565622329712,
+          -0.5080993175506592,
+          1.0380364656448364,
+          -0.2017953395843506,
+          0.21319580078125,
+          -1.0763001441955566,
+          -0.556368887424469,
+          0.1949922740459442,
+          -0.6445739269256592,
+          0.6791343688964844,
+          0.21188358962535855,
+          0.3736183941364288,
+          -0.21800459921360016,
+          0.7597446441650391,
+          -0.3732394874095917,
+          -0.4710160195827484,
+          0.025146087631583217,
+          0.05341297015547752,
+          -0.9522109627723694,
+          -0.6000866889953613,
+          -0.08469046652317047,
+          0.5966026186943054,
+          0.3444081246852875,
+          -0.461188405752182,
+          -0.5279349088668823,
+          0.10296865552663804,
+          0.5175143480300903,
+          -0.20671147108078003,
+          0.13392412662506104,
+          0.4812754988670349,
+          0.2993808686733246,
+          -0.3005635440349579,
+          0.5141698122024536,
+          -0.6239235401153564,
+          0.2877119481563568,
+          -0.4452739953994751,
+          0.5621107816696167,
+          0.5047508478164673,
+          -0.4226335883140564,
+          -0.18578553199768064,
+          -1.1967322826385498,
+          0.28178197145462036,
+          -0.8692031502723694,
+          -1.1812998056411743,
+          -1.4526212215423584,
+          0.4645712077617645,
+          0.9327932000160216,
+          -0.6560136675834656,
+          0.461549699306488,
+          -0.5621527433395386,
+          -1.328449010848999,
+          -0.08676894754171371,
+          0.00021918353741057217,
+          -0.18864136934280396,
+          0.1259666532278061,
+          0.18240638077259064,
+          -0.14919660985469818,
+          -0.8965857625007629,
+          -0.7539900541305542,
+          0.013973715715110302,
+          0.504276692867279,
+          -0.704748272895813,
+          -0.6428424119949341,
+          0.6303996443748474,
+          -0.5404738187789917,
+          -0.31176653504371643,
+          -0.21262824535369873,
+          0.18736739456653595,
+          -0.7998970746994019,
+          0.039946746081113815,
+          0.7390344738960266,
+          0.4283199906349182,
+          0.3795057237148285,
+          0.07204607129096985,
+          -0.9230587482452391,
+          0.9440426230430604,
+          0.26272690296173096,
+          0.5598306655883789,
+          -1.0520871877670288,
+          -0.2677186131477356,
+          -0.1888762265443802,
+          0.30426350235939026,
+          0.4746131896972656,
+          -0.5746733546257019,
+          -0.4197768568992615,
+          0.8565112948417664,
+          -0.6767723560333252,
+          0.23448683321475983,
+          -0.2010004222393036,
+          0.4112907350063324,
+          -0.6497949957847595,
+          -0.418667733669281,
+          -0.4950824975967407,
+          0.44438859820365906,
+          1.026281714439392,
+          0.482397586107254,
+          -0.26220494508743286,
+          -0.3640787005424499,
+          0.5907743573188782,
+          -0.8771642446517944,
+          0.09708411991596222,
+          -0.3671700060367584,
+          0.4331349730491638,
+          0.619417667388916,
+          -0.2684665620326996,
+          -0.5123821496963501,
+          -0.1502324342727661,
+          -0.012190685607492924,
+          0.3580845892429352,
+          0.8617186546325684,
+          0.3493645489215851,
+          1.0270192623138428,
+          0.18297909200191495,
+          -0.5881339311599731,
+          -0.1733516901731491,
+          -0.5040576457977295,
+          -0.340370237827301,
+          -0.26767754554748535,
+          -0.28570041060447693,
+          -0.032928116619586945,
+          0.6029254794120789,
+          0.17397655546665192,
+          0.09346921741962431,
+          0.27815181016921997,
+          -0.46699589490890503,
+          -0.8148876428604126,
+          -0.3964351713657379,
+          0.3812595009803772,
+          0.13547226786613464,
+          0.7126688361167908,
+          -0.3473474085330963,
+          -0.06573959439992905,
+          -0.6483767032623291,
+          1.4808889627456665,
+          0.30924928188323975,
+          -0.5085946917533875,
+          -0.8613000512123108,
+          0.3048902451992035,
+          -0.4241599142551422,
+          0.15909206867218018,
+          0.5764641761779785,
+          -0.07879110425710678,
+          1.015336513519287,
+          0.07599356025457382,
+          -0.7025855779647827,
+          0.30047643184661865,
+          -0.35094937682151794,
+          0.2522146999835968,
+          -0.2338722199201584,
+          -0.8326804637908936,
+          -0.13695412874221802,
+          -0.03452421352267265,
+          0.47974953055381775,
+          -0.18385636806488037,
+          0.32438594102859497,
+          0.1797013282775879,
+          0.787494957447052,
+          -0.12579888105392456,
+          -0.07507286965847015,
+          -0.4389670491218567,
+          0.2720070779323578,
+          0.8138866424560547,
+          0.01974171027541161,
+          -0.3057698905467987,
+          -0.6709924936294556,
+          0.0885881632566452,
+          -0.2862754464149475,
+          0.03475658595561981,
+          -0.1285519152879715,
+          0.3838353455066681,
+          -0.2944154739379883,
+          -0.4204859137535095,
+          -0.4416137933731079,
+          0.13426260650157928,
+          0.36733248829841614,
+          0.573428750038147,
+          -0.14928072690963745,
+          -0.026076916605234143,
+          0.33286052942276,
+          -0.5340145826339722,
+          -0.17279052734375,
+          -0.01154550164937973,
+          -0.6620771884918213,
+          0.18390542268753052,
+          -0.08265615254640579,
+          -0.2489682286977768,
+          0.2429984211921692,
+          -0.044153645634651184,
+          -0.986578404903412,
+          -0.33574509620666504,
+          -0.5387663841247559,
+          0.19767941534519196,
+          0.12540718913078308,
+          -0.3403128981590271,
+          -0.4154576361179352,
+          0.17275673151016235,
+          0.09407442808151244,
+          -0.5414086580276489,
+          0.4393929839134216,
+          0.1725579798221588,
+          -0.4998118281364441,
+          -0.6926208138465881,
+          0.16552448272705078,
+          0.6659538149833679,
+          -0.10949844866991044,
+          0.986426830291748,
+          0.01748848147690296,
+          0.4003709554672241,
+          -0.5430638194084167,
+          0.35347291827201843,
+          0.6887399554252625,
+          0.08274628221988678,
+          0.13407137989997864,
+          -0.591465950012207,
+          0.3446292281150818,
+          0.6069018244743347,
+          0.1935492902994156,
+          -0.0989871397614479,
+          0.07008486241102219,
+          -0.8503749370574951,
+          -0.09507356584072112,
+          0.6259510517120361,
+          0.13934025168418884,
+          0.06392545253038406,
+          -0.4112265408039093,
+          -0.08475656062364578,
+          0.4974113404750824,
+          -0.30606114864349365,
+          1.111435890197754,
+          -0.018766529858112335,
+          -0.8422622680664063,
+          0.4325508773326874,
+          -0.2832120656967163,
+          -0.4859798848628998,
+          -0.41498348116874695,
+          0.015977520495653152,
+          0.5292825698852539,
+          0.4538311660289765,
+          1.1328668594360352,
+          0.22632671892642975,
+          0.7918671369552612,
+          0.33401933312416077,
+          0.7306135296821594,
+          0.3548600673675537,
+          0.12506209313869476,
+          0.8573207855224609,
+          -0.5818327069282532,
+          -0.6953738927841187,
+          -1.6171947717666626,
+          -0.1699674427509308,
+          0.6318262815475464,
+          -0.05671752244234085,
+          -0.28145185112953186,
+          -0.3976689279079437,
+          -0.2041076272726059,
+          -0.5495951175689697,
+          -0.5152917504310608,
+          -0.9309796094894408,
+          0.101932130753994,
+          0.1367802917957306,
+          0.1490798443555832,
+          0.5304336547851563,
+          -0.5082434415817261,
+          0.06688683480024338,
+          0.14657628536224365,
+          -0.782435953617096,
+          0.2962816655635834,
+          0.6965363621711731,
+          0.8496337532997131,
+          -0.3042965829372406,
+          0.04343798756599426,
+          0.0330701619386673,
+          -0.5662598013877869,
+          1.1086925268173218,
+          0.756072998046875,
+          -0.204134538769722,
+          0.2404300570487976,
+          -0.47848284244537354,
+          1.3659011125564575,
+          0.5645433068275452,
+          -0.15836156904697418,
+          0.43395575881004333,
+          0.5944653749465942,
+          1.0043466091156006,
+          -0.49446743726730347,
+          -0.5954391360282898,
+          0.5341240763664246,
+          0.020598189905285835,
+          -0.4036853015422821,
+          0.4473709762096405,
+          1.1998231410980225,
+          -0.9317775368690492,
+          -0.23321466147899628,
+          0.2052552700042725,
+          -0.7423108816146851,
+          -0.19917210936546328,
+          -0.1722569614648819,
+          -0.034072667360305786,
+          -0.00671181408688426,
+          0.46396249532699585,
+          -0.1372445821762085,
+          0.053376372903585434,
+          0.7392690777778625,
+          -0.38447609543800354,
+          0.07497968524694443,
+          0.5197252631187439,
+          1.3746477365493774,
+          0.9060075879096984,
+          0.20000585913658145,
+          -0.4053704142570496,
+          0.7497360110282898,
+          -0.34087055921554565,
+          -1.101803183555603,
+          0.273650586605072,
+          -0.5125769376754761,
+          0.22472351789474487,
+          0.480757474899292,
+          -0.19845178723335263,
+          0.8857700824737549,
+          0.30752456188201904,
+          1.1109285354614258,
+          -0.6768012642860413,
+          0.524367094039917,
+          -0.22495046257972717,
+          -0.4224412739276886,
+          0.40753406286239624,
+          -0.23133376240730288,
+          0.3297771215438843,
+          0.4905449151992798,
+          -0.6813114285469055,
+          -0.7543983459472656,
+          -0.5599071383476257,
+          0.14351597428321838,
+          -0.029278717935085297,
+          -0.3970443606376648,
+          -0.303079217672348,
+          0.24161772429943085,
+          0.008353390730917454,
+          -0.0062365154735744,
+          1.0824860334396362,
+          -0.3704061508178711,
+          -1.0337258577346802,
+          0.04638749733567238,
+          1.163011074066162,
+          -0.31737643480300903,
+          0.013986887410283089,
+          0.19223114848136905,
+          -0.2260770797729492,
+          -0.210910826921463,
+          -1.0191949605941772,
+          0.22356095910072327,
+          0.09353553503751756,
+          0.18096882104873657,
+          0.14867214858531952,
+          0.43408671021461487,
+          -0.33312076330184937,
+          0.8173948526382446,
+          0.6428242921829224,
+          0.20215003192424777,
+          -0.6634518504142761,
+          -0.4132290482521057,
+          0.29815030097961426,
+          -1.579406976699829,
+          -0.0981958732008934,
+          -0.03941014781594277,
+          0.1709178239107132,
+          -0.5481140613555908,
+          -0.5338194966316223,
+          -0.3528362512588501,
+          -0.11561278253793716,
+          -0.21793591976165771,
+          -1.1570470333099363,
+          0.2157980799674988,
+          0.42083489894866943,
+          0.9639263153076172,
+          0.09747201204299928,
+          0.15671424567699432,
+          0.4034591615200043,
+          0.6728067994117737,
+          -0.5216875672340393,
+          0.09657668322324751,
+          -0.2416689097881317,
+          0.747975766658783,
+          0.1021689772605896,
+          0.11652665585279463,
+          -1.0484966039657593,
+          0.8489304780960083,
+          0.7169828414916992,
+          -0.09012343734502792,
+          -1.3173753023147583,
+          0.057890523225069046,
+          -0.006231260951608419,
+          -0.1018214002251625,
+          0.936040461063385,
+          -0.0502331368625164,
+          -0.4284322261810303,
+          -0.38209280371665955,
+          -0.22668412327766416,
+          0.0782942995429039,
+          -0.4881664514541626,
+          0.9268959760665894,
+          0.001867273123934865,
+          0.42261114716529846,
+          0.8283362984657288,
+          0.4256294071674347,
+          -0.7965338826179504,
+          0.4840078353881836,
+          -0.19861412048339844,
+          0.33977967500686646,
+          -0.4604192078113556,
+          -0.3107339143753052,
+          -0.2839638590812683,
+          -1.5734281539916992,
+          0.005220232997089624,
+          0.09239906817674635,
+          -0.7828494906425476,
+          -0.1397123783826828,
+          0.2576255202293396,
+          0.21372435986995697,
+          -0.23169949650764465,
+          0.4016408920288086,
+          -0.462497353553772,
+          -0.2186472862958908,
+          -0.5617868900299072,
+          -0.3649831712245941,
+          -1.1585862636566162,
+          -0.08222806453704834,
+          0.931126832962036,
+          0.4327389597892761,
+          -0.46451422572135925,
+          -0.5430706143379211,
+          -0.27434298396110535,
+          -0.9479129314422609,
+          0.1845661848783493,
+          0.3972720205783844,
+          0.4883299469947815,
+          1.04031240940094
+        ]
+      ],
+      "userProvided": false
+    }
+  }
+}
diff --git a/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap
new file mode 100644
index 000000000..4bd0e2c3e
--- /dev/null
+++ b/dump/src/reader/snapshots/dump__reader__test__import_dump_v6_with_vectors-8.snap
@@ -0,0 +1,780 @@
+---
+source: dump/src/reader/mod.rs
+expression: document
+---
+{
+  "id": "e0",
+  "desc": "overriden vector",
+  "_vectors": {
+    "default": [
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1,
+      0.1
+    ]
+  }
+}
diff --git a/dump/tests/assets/v6-with-vectors.dump b/dump/tests/assets/v6-with-vectors.dump
new file mode 100644
index 0000000000000000000000000000000000000000..9f8ed2ba11d233ed5a1fadd25a18e3d25c7e04a1
GIT binary patch
literal 17539
zcmZtNQ+Fi{&?Vs5wr$%uaZYU8Hai{Lwrv|7+qOHl)xo^qteJ~hYij?6s;gZOaWo9b
z|DK=gTwPa!4foP_fj+Py*gh}7OJ}}Z?}P@AB<n%LV3Hu}D$OQ|a*9zP0GY}D+3m?c
z|GCx|&diW@VuzqT#72YyURyi!^y+fc_V)MpLNf7Zs)}s0=U3(;!M!CN<9Ezj^0wqe
z?NcJu?OfVL^7j#V&yC~ltb>9=-lvq|7(Va!JGr;MUM{hpe~0gbSnv1S*PM0Aea8>+
ze(TFmu_AK)O`T)jCrQ1q@sDiXx$}>3|K-%bpN045>VMqf)!kdCKOR25?(auF-oV@?
ztLyAn;ZwC=Ul-q=U)K}R+QWT!Uq7GcBkxPtipb~Z=Sz=6*YsasA4@|sGb8T@7Zf3)
zeeXQq9{-+3PWdjLH@=Q{mazL|<{r=g&i(&lI6PP1_jcGvuoV8u<=es6-QCCA{af+j
z?tN(}J3ajIF}iy147=}RrTMAn%GJ@AzPUxnwX*zIeY!n9qrLC6MqBTr!Q{W)^6$ja
zyrni|;>`B8*R1v?P2aAWBH3lj7vI%3^;sqF>hvmb%k$=Cw$XXr+B81NjR!l|sqEcK
z^R~2S14sTbjX&iv>J!y<%H`XaR-F#NuXmTWZMIzN>|6M}>R(#s^eh)kTgtZ6;~nj<
zR3q*8Y~}>#mMq`Br(6+834N?fTWVcdi)Vz&K^p#Ceo&p_``A}H0*Ut~J+ku4`J!+5
zZ{;sjlbF3N7?oH1-0Dl^W>Y1rWd3kd?ls5Oj!mTK(!Vp+<kr=`ub$L>`kZzBn(gwu
z@x1AJU$mf;*=Y^=twGOH-79igU%z<!erIs1q#@kSo3u)}AYEI|xW=b4Cz1IXf$S*X
zxB6w<db!(UolC##NeW><t##US{Kpp3-8VP&Io$oWE2=Sda@Bcb_aUbhQ!Kqs5V+&m
z)>f+WJo4dhFHl~ilTc^BwBWO(L)P;#mez`3@7A_VUyq-zHq9hg#K}CJ;B>vo>+8Js
zjbRuyTadcfYMR8R->#_yR`q8UwuO4D&64>!Mf{pi>EFh$*1Opf`IS+JB<Jh6IW5oH
zL=9F<d-cucbP=DW)<t|Uc6add#-Oe<U#nYp)Sc5U3b^LmMf5NCM>IPL`u5T~W+vD6
zm~{PDCfIy59(|Rwg?YZ&D)QFA52$M8EyxvX(yoMz!Bz5deV?9Kl~-lFsY%#YH=mWH
zhF`kXy~g(b4=&d9uf1cvwuR7M-?PpY4thmxV5n&xHttlH_2|H0yo=t}^(kK&z(rmO
z4J}g_#*Uu6gpW1_<vk|f^|+}NdiS!uPH3rv4*v>jiJOWfbBl)N#elCVNKK!rW@952
z@Rj}DYE{L?)*aJ_SQ=%IBz4JO{ji|A^?m8jCBrY-f9&pX^atw}sYc^xR=}?`JGshM
zOgD|9SzW@$R1hnllB}z%c_BybcACbOSZ#twh7T%Ojm%2h)*4$hHf>&CTX62b@L}Q&
zGJ+=t3uOa-8%oyimZ^7Fq!=U@T|%`fH6}@2I=`Oc>LeE-INHg*z#n<qYHBlSa@w7^
zHlT2WH=m|BOeNlY+G$g+TjsH*;##CU=>rvabL5lAM&^5s43jXM!0ylS7G!MysHvay
z05d3E(KusrVh}VC12IUZ*?^xLON5lx<gRL~e<tZj1kmiW=M(X{8YYpXM|)EAXGJwB
z@(U%}XXbG1fWdAehL(v7MNc{u{fpmZp;e7`AOEOOYeON={Zyl=a@df8F$}Uus40B5
zxUE!OM_4LM^fdYeU9u26D#fv@29sb$Vs1+YyQGkb=8kM?d_w>DwU)r>qJ|P$Tg^@=
zI`i1}ljs<M&CXjbDjzdvZ=HX_3b=Q|+<M6cI2|M|`PkEBp{Jg_#@yIwrEH5QRWYWD
z9K>vj<*U~PeL1IA8rYK}<+)YGc-<LwkZKLbY-^ZT^X^F*3%BK?ZwVuxlULo3e45D7
zD)eQ9Y?$?;bKN&v-gHwJt^c;fbPJaBYSFD~vt$MkonBV1Yy90NlWTyQiuXj=vzx*J
zHa#<<;b4(@h*J+Y03Y2pS(X-y$#^BVJT~&w{^}mRw_d6I9t^ODXQuF{SXlgVQsQ4M
z3ti)+{_I{=G#CdXJ1s9wzp*yfP$SV!)*-zgPfn91>pjndyTPa&Xr6kJwsHfQ=Ks-<
zzk$IYd9gNsRqC}*$JT6UQRjVP>D>%Mv=uO4dLrGztj$)F38HY%K*E5jE4kGJrYdRD
z^Q`{9h)D`%_>&rSrh@VBtcfM>%PN`5K%dJ)`nqJekhDoMN|4Jp>8}`#`2$Z2Ff^KC
zBN36i`qfRsg>2}uL^h$sJ%>Oh&cj?AH8SHx9A48&POoJ(_qLi~(`;Ug-?#Cz*-iJ>
z>>p3IL~UxX%Z;)13j^;}NDQ3aU<Q!y*_OAXCU_H0!Hl-WTP3a0n%;6eG>B?D#jG9W
zXoDGfyM+Q;-Ml)2_L%3$!ebCBms4EgG82ZJ0D7`K^zEFig(gWVv!S6lN&^#2K21xq
zZ3FyZ!CZnO5zVd=!O)I91WJLR%rm_qu_7aT6fCSV%}wD@)Y@DX#I%`oN*020VgAC+
z5H6ZFnHFg0STA8*F<oea3)u}9(AhrO`9MOHNKRxtgyDagS0=oDvQU&>=A>6xax~H=
zF-EUfQdOJ_%PdX-;Gox~_r!}P6P}f%6Qa&L8;@7mMR9_*&kk05cV&!_UjqER2T9UI
zL8o<YkWr<uO{S{%Kt&VODMmA0lORmk>HO9TQ*9Elm$@*xEp=C)2MRES_x7YaNPc4#
zQ!tm&9~DPbZKv(h&|-k=#wcAqz_Mv1mXFI=j&$+}GK@bM^~{Hi%4`IT2VVwIK(3sg
z>cLeqlQJyJ0P!9W92sQ;byHq-Uo;eo6X$6v5*^pVl}OAnVajJ|mpGLWfyo>iTa3CK
zpS4liZa@l&T%IA;#$f=xhVzg%7ldsg^7(@qJ(cnHNw)epO`<g<a^CAW0?gC#0s(r0
z423RA66E>8(H~623t4V`S*p0??r07g<7xOkvg)V^Yg@Kapkb8Q3)5tDVF_G~7TvA!
z;6FaRRp$A?am+g>6#{RE%uE)iC>S>vd#bW<oG%RiVC9AMTZP_F$|MZN1_+7K^!+*1
z)iPmoj7s3D(!BhQv`6T0itYB$2K{H^O1cpyfz5U;_?|cng)t72B83wqJ{f5%fOkIi
zHBF(RU|keTQM1Ri+6<FYXO^u;`$G{f;B0Q3i*lP{$ORG(K;VYdWQArjk;hQ^Cq9`B
ztyZCqEr(Fn5*k1728-HZQq_$1L&1Y=2tArig6uE7nSchpu302Zonn_{8fs_6c79rz
zT<YTn>?LE7)^*(C(0C?q*`56G-DDq`i<%kf5*J<Fh;0$Pbr*2QZ7d=@bn?vJG0r&o
z5h}Nx^!LM4SB10-J}FmHcX!p*zV@iR)R_*qXxdeKiGG*nyNLEh<FMR|J}SH)5HwPh
z6FPI_97#cisbhoQ(M(Xr1w#at?F0V>rt`??`Er9Cm^uZZ;IVWD1L-39DL_$zXPDY1
zGn+?~m$MWhb~OS8K}mx4n8h#Y9<V-$9p@Mf=Q3$0GzJEcTDee<B&KSxiDzM!iITEZ
zg4u}VMZxVGIqxUu!uQcdlM?q`&yGE?B3GAKh@(B=fRb};{Kl2UopFg_c)Hdx)b_y*
z_cbQf(c2Ke*0mJW953ijuuQYuP_8(<1BfKNBs9ndwMoHHQHKdq&$9k5oJ!tHIs(QH
zkf=BdN&Jc;t?w}GX1v-A`|iEOJKDfBMBAFQJu)VNY$e|$!-GqvbFH`@@IJG6fx&Mj
z(aVlYyh~{)z+bG4(a@pYdDM_j!+#fIlAg70zpwagMLjoJ5v4d%r*z#EV|3pfOrh0D
z^9q`!p-pz%T9Pq#mi?f-OK}uaPCHVngbTxU6(2A01s%YPz|@{+mx6|iHG_hoPOq`j
zHK%cgcp&cLHIyeEO(4(Lv+*3CV0}xS$WM%?=`be8Jht*F8>unHw2aI#WJ3$c4T_Vo
zDJI;j@tRz@Cngl8#7D7FhUMO9hVe*OUV-J`*QJVWfIiq}d&Nu_==s&ebk+i~jax^a
zBzMxn51#@}%T7l6Zk4M~ACalUO@F;6Z^eIaBbek9L~dWV2%G6|7*nhSCK?K3wk4Z^
z`AsL8h`KACw1#`{6bwV;Bv%kDXhw!HX5}=?2*`r~P&89JCtiCBNgbB01DXm#j5b^~
zwA%2ELDq2M{9CKoD$RJCsQJQUvArF{Qd=Y7RC`cjYZS|(7#0F7U5PU9WsjiaO5J?>
zAyx2p4BI{^`+<=u7=;Orl5T~RYQNiq875N0e76Ikx%yGhx7<FrSRGX0wGB<gqQeAW
zdS!9d;cDt}L2=9`mK;cH7!?%gwM4=xnn>}x6yERTK-Nt%MKT*8*{VNejntbklb+%7
zs;2o59gaQWP9rD@8fr))F)xl{3(UVOz=I;oSE2uS9JMBG*{Nn&Kmb8Nb#OwlU45IH
z-PmeIlzpO{d`%5EVEV^#y}=aV<)Jbv6eKA~y%1=>EWFKDQlT%2p33a}UDp|kLx)tm
zwiYakq3%jkCMT>y-ymQ};Vby1VdSy{A+6gfT#t^~G<O;=(m{%jSt-ySOz|aaTUgG-
zGxuYYOho_n`|#skFsK@sxR%_?!ThV<k)QIUG&Puoal2=H!S7Hn%lwVySo&J!AxE5&
zyBbBC7mv)Jtbs*eTy%L0j|w!q?MlYWM?{dx9~z{GOO{8Bk&B4?hBkE4<ugYlSsHc-
zQJ$3cAm_v14>l*T@2Z1z7TMA<%tYHomS37x#~0Ja%?W#{LIL~>L)SG<X$Vr&<R|1c
z3TbZ4wX3-AGqcJEB#44(GTK{*T<wSVH(l}K8zcZoUQpMYnMA+yW!0R}-nq%Bd{a9q
zDG~i8v=qJzhT-nWNf2>yKsSb*V#p%Ubm)>(fr?6xjRdSj&L)nPB~PI)2rQ&IW>8JY
zj$n4F(xHJ|S03mK_)}`*|FHK{-_{|rFlp?uWg{wTq-k^8Xo{}P{@N7)%OhcwqG6tr
zS(<zcGt5#|fFxzo#7~wZA97O53V#NVWymn)t-eIN&3!|v;v<;BVH?HIn$ebO8T%+E
zAQTu%yaXp{x;)Vki?D#!>&Q_Uh<(+bk(src{9B@#Fd0Ji(me^qD3A0Rk<w~9l-4J!
zZn0@*jJg(`k?xQ;m}7B~2?gSzFt<c$O8nIICGaWFiH_Q|EjcIqk}V<R2(8t!Mz|rY
zGEe2oumx}={GAWW<N*7&WsKm-D8<xt6h3gj3n5c%R7MvWfBDFWq^4j5bO~CIWnifi
zoQxRlVo&O0QG1s)E?SiGZw-9LV(}1@ql!>XI-`vbxXJ{0AcT3-ZNR-szw|N8e0a1K
zUs-r_Vh}xx8b(n<Ww>D(N8p+ah3w9SYCpk*0y{O%6gN)tmIz}i9Y3pvUjk|;Xo?wl
z(8{$DKo`k$mKrv*nz2m7rse`&Li*U>Bce{+Oob8-1fLYT{gghjct#vtg$>dZG)k7k
zT&$mLbu{P;2x1oC{+&ZRn26`8Lg`{k$xZ)=!xV2qopQEiN1L`3#~Z(t7QPRvEUSkJ
zoq+8bybGs?-$_7v&PrziIo?Bep;hb`kZXSw)ucOSf{K+`0%!znmA8z3kYQ_EB;}Ri
z4espCQo@BKLr!qwF-%dAB?68e15~_PXBx?f%9W<|D;W1P2IUxux2|G`*zC>51Up-d
zU&oka1fC=!8P+!F&x>L<hGS&uWbLT)%*RTIAJwi|i2i_FhJYbbhhJ-vPAiZC(rNsP
z+5>4#yCD5xLdc`1=_yO2=sRVZG+WHs;Be%~e=XbdP|TMf6`7HAAtSTsHatt7Rb=l>
z@a3G`ivP`kQAz+PKJzYe`@Y9%`0yX6v4_ryd~NVleu`NVM>HI@60UbjR<`8MXT;Fe
zc)M>7Vaj-Rbmiu#Ve3a?g<)#`WOba=7*gC8i($g@1Nfb@5Z$3&&6Giq!0D(JQ_e}7
z%k0?F^z1s+Lt$bsrd{H}6-Ck7r6i(F+^LpSSo9@-tGx?bfFgo5!=nf!V^lQ!C@8Gb
zT7sT*wGh3aC8(ZKUubG{>>o)wMO^1@&%4M()^s+IRMyd<7Boaht(PQVSixD4+-)=z
zcB%TphT2bPF9)}v>N<xa^lU*Y+~`QpgrNfV@8d>>!FhRo&x!84dWp9+Er68+Y}GoP
z$NVWPrIqIJe=B-a5_$QQ?pEWv-8<DB2SQ*rYX#98hYers&ghH2W)_ix3U%WiX?Mt7
zAs4mV!k%)kh~?~g2!h=EH&u96eVpBwf5UQAc0p~<=hA}yyS{QX@J=&<wB$O@m|3_e
zEM4zPJZO@7F908$Vn-*HXeoRK<WYcZ4mC1%l%kQIv%D*3x~8EY0HR8{7vme)HXDxY
zR1=1JQE1p45AkiVs=Z==D?5~^tqm3jH@nBwfC0@(5}TCZS7WW*McUIFZ2!T2vM>n3
zNvjA)z~RcZD!XVWvZ0ZAiiwj95l4{lh>@?PX?p@mlkX25@Fm&n%|ya}bMuIk_Ci_6
zL$#xt2E<8q;Irn(a9SfXfXRXa#)oHdnc-oJ_IDdkgY`y6tc#c@>JS<nmO^=zmaI!)
znq-=Naywj|ly~6XEg7cWEC4cTSoGA+hlDZmVHaA2$&Z2TK`ki!P0q2?ang1sgi=TC
zn|@fbhNh`npk)eFCHg0Qbudu$<c-&LF2_bg6c-a1Y960<G?mlc9@#D-CNRbKW_cfi
zq|mH+ox*n9;{?x+l)dz1P&IQFh*rYoygZqkKXh@BA>{gdjqIDUJMJ`la!ii3C6hc-
z@=!k|g_|^Ho6&0%kfbOncAVy}k>5@TO-d?D4b|How7vD)*n7o6okAp_)ea(e^&H3@
z%%*mXzOf}@YttTB%<SiaIYAK1Wj+Q4<~sz|QUyxE%A}8_Bbqeh#87+zNk{JR=d2TR
zm9Vt2AMMG-y(m;ppp`j6dP<ceXUdbJl=cZ)7Y0329o%s001{vEULcgXhK%<brZV+>
z|G(Ir-2n$M^bjTyHQG!$`rjq1kx1h{<M849Af$tC<MK~L+YCtuc2lB3gq!?4m__pu
zzpm1sPgx=Xz%wF`JaHYggvfHtzuHc%UM~M;o>EZ5F%v?OF0{3yFx6yaQmJW_O<zqx
zF}5!dS14(;y+xX%3%khSN+$Oi_E5>`d7uLcW=SYqVhapJ+TDEXjH*D=5%u~8Gf*sY
zWvju?19bej0w-weziSEFArMOOL9ED<Y5`5gWbkJ&oYQzgqQf(>S@4CB5k@vBxk>6M
zX-5&hG5)|Sw@~*92{5&w_*K%cCzx3{uV<*$)*%fzqs)$u%*Wu|M@wzR=^v6VbvX3*
zMJUwRB!wc39;v)rIF=O<hN-Aur4kfuYDge=xzY-BVwt(%qFT!In8>_caGhsXrz8%v
zLu0>FI?ssDKKqGgj~lfZB3Wt(CK))D+9bIQvF=u}s6~LGOnNeO%?%PoiX0@Z)YdD4
zYdnTy$h3a_dlHi1mC-63=T#aMO01FaPyCtK!&&i@$>Trh4?%R=ku4a*|GdOE8nyn}
z2}G@10s36k)nw`3xAbDO%9bd;Ln6a8$$11gJEU^){VBC0;m;wGKA@yA8C~d?Mg2Hj
zDQTrP)E$O_+R!1yppr@S=)zO8iKGsNP?5u?)Z~fuI$B;~{cHODE(z+fsr|g;PAI<R
zu;ob0QfW(IUbPMQoxePOAJhRBN>IR;hZ5gH)<|WI+SFma(F)XQO>8Myb5KVB;E?)B
zR}-n?@A$$p$I)iBlB24XzTFnUKXmf}pkIm_<=}YjEim>Q#%y*ezNx=}!0t$?QapV@
zcjL0m@uL*?!g5%Lq#i-c?A)wk71?<o35bjd_2s3ggf<NJIktj0M<h%MSrNA6##2d9
zBsk>dSjGnu8D3nA<muPxF&#@Oo<9bg#R-bh*eD$N1W3x}BONe@+B<QqK$7s>q}CWt
zi~t+LfOhc`3V*TVu1*@Aeu@Zcj8aJkW^hVNDndr3R=Y}(iLn;=6zNQQkwVRGq!Vgw
zIoE+FaKnzRbIC-dgQEaZX_)Qn*QU#}%@8?4t^kctk5nc3a^_`mva*!KWIx0`;Igh8
zXljbQF&+gXHddIoWC9WNcK_XK$P5lt)%ObNbikCH%lt&A*mmVM-9sS~{ZwunrsbsE
zlx|WrAiyAz(7k}rRi2U>#UN+_s6ZW`0A>}cAdMPi!u_rhazm(3oloh7n8jWKBTe6=
z$VjKlf=TbYIXiqw)Eo7SEMEgeDVPzg7^Aod4$=xcTRsj}Nnk*543(FH-Ab*;-8&3B
zf(80etli1|C9rGAq2QzRb@Z_RqJGC}J%&MDSBBFL1UtyTArFoNA_~dMAUZ}$u<Fp)
zIP!EHOj;TinBpn74_jA!aAStwkM^ojgn?YbG$%Xa8z@c~5dBLPk_d$dcz{zN#a5BM
zsACU>MCm)~y{QGtfDdk{0!OQu4bE9)0V7W(f&rVp9Z(gks$`c?ae<!$w5XD1>Ljpb
z#;~MZIiiqSL>Ffb4moL+aH0Onc#9Yg!&%w_qBlCOuULR^XTnO0K=H}w!G>oBaaXuY
zv@DR?-2h5LZE}LG0W(kumrgVE<%?7uNFnx7DD*t(q041dL;JegIbwQ~ZkWX17-v~I
z`AO{K0#3rBY{s0nw%iU(i*1L~et?LL0#GlJXLrIdSl(~Fw?n&G9HZ`_NJ(+%2Jj0V
z@+|`ThXFokoYHk)iKStU^(gTY+^qDN00)nAB{VQEc6Yql1oq~LMpPCshBH3e20Xg^
z<$28BvkHx3E$9TsXwJkc_4wUsQ?mh3ZN!@-VMRETu6Dwjz~{LZW+k3UCQWjdX2E4^
z63~M~;~;)NLm1R*3CZI#xBe3ZwAebu2Wcoy`#+t0uHf4v#MwlkXeprz227v?A~H!s
z%G)i*P*^>~5)=`9!xZL)P!`k*>rcx6lCpw#JDm6n6>!N}7jSHb{Ax_5hMf}PN`+X1
zBuaYdjkPF%3$;dg@xWgw_LYDGf>em4R(Ow~m<|9rcARF{RqOCBU>htS{w%&KeVQU8
zW!1Z}rQ(&t%de>yheb*%*(obZA>m9+x{_mtMs5JwB~D^KJJ5%eF5~sIs}T)Br56{T
z!By0{42N8P;53_N52~}_kHm~wJxo@`x^p5|0aLm{v##_b8D+nie0beN&iAp{wp-$U
zZ2Z1Aa06YP!XD)slS{2EieEC^)1Fr$yZU4sNBM6Q4?$-~3s9PfLcz<Gt*pz#uDMTi
zkFHPdX_V8PJSMdlxlOf<CPD0BXJ)ay!&aqgNdc}Ia(3+$@?>m-O=8`%8Y2+BIA^KT
z4iT-~1^%Ka<pPF)%$Vs&CPhKSe?RhFenOVYbAy#F>|9lC=e~*&+C~L|R|PugRz0}B
z?3puP36)|6J)MB`S{jc%jEG<~+e};(74x%+W{<PQCgDKT*D-Ng5>b^Z_<{T5>Ix^u
z+lQnm7piLpN_l@Hn5*srNTohal{gD?nS#oj(hxV-$c8WSqJjYvYV`S17bVk^gC4D=
z$DRNO;D)r>YY#XpTpTheVguzTkD-DA3(JhC7OzEbuboMAFdOES164M44S@0yHtN1`
z1FNGMZY}5<qEB*C#oCDj6bqzsB(T^Ga>1l59{x!_QHIFHV!$cDa2Ba~*+0c>#%Vbf
zH+%RHi2x3hPNL-COPGl73t<`ZL5KR5PttdbpcoUu<#}enfSn^D-WyOQ2kb<plFcmo
z(Vyg%kU!xvFd-T@m*N+jF6>15V_+z|+p}+?Dh+^|n}FG7VMzU5IFi<Oq^v_)<G~LS
zD6ogLw=kl`OjEwMH7?iBCYtAvGvg!|@iZXKJ>Qz?fc{A?rkN-5Fj-r{DdneDTb!Xy
zJ)R->kQzb?4Y-**q;vj$QlOBcKwcHWO1B6T8SG$FL4im%eDa7i!8S8H2g&#xJOxQ3
z%{m}<9iGH+dqTtj&_agR^i!2ma5gXP^}6M_Az+S79~b;_0FYV;PFjd}2C^9UFK7%3
zU9GkL;7{j^h)|3xM54WiILYA_Lc#`@&~-ChgiP_lR)Qf!G5Dr2mc-=YEb5t5!&Ffj
z+lDfU$o;V?XY5>og?en;7s72p_$@cJMf7a%lnpD)8t4?KWs%Xn=q;0Hr9lC2@kpOq
z=h%p=zoUTY%H3>PU3f?$m+2(TwG-=zVL~V(Mna>kUr<hp`lK=nXY3ISpU@J6+AAnZ
z5&<r{NMEx6v!r)&dE&Q5Lx?3MqXdM>(?}xI7Rhbli`-^O!^NTFBj=)<aPGSqZQ2lY
zscscRJZl6=VxB5G-kB#w3B-))h%8I$#qPOi!fu*yQ<)ed(nt78qj7X~!`Of-g-nYT
zH>3PXl2jQ`7~2STZWX|EZEz1BBf@lnM`v(uMO`36feL|Y$%e@~K8H-Xxhd>d-pCHf
zw|t{|Qjmm)fHicP(?p!G0$gC=RtBpYieH1al63a3eLUAI=I`ziIr#axe*M=RAAFui
zuUV$IhcoY<zcKEu-upc4{ntwQKBV^UbzgR0mNWZ3UoPEVfrL@@nA#jXf`Yy?!B-}m
z2dPIgG%Og1?$WQoJ85)Q1i5=-S^0uWm~PRH2Sid+Zr;PJbN6q~R9W0gk7kWekfwOg
z@F=^S)1aF{e`Z&t4OB}$>aZBJd@T2ytD2X;jx^=0=j?RY?oAdg4secUcER=4CS-i;
zyF>(qka4S>QT}zqRQaoYz>$Kjqwegs%eV-t4R24Or;43;v<Sm94@w7*IuIxiK55>(
zq}hl@f^ui#s@YNs6=F#~pJp2?K7X>_U&C6g1cOols*I?hEZ*2~*1xb%nI2&4j*G9y
zp9wiBX6(&BlfHkVv$=v}(?&;y2jyxAL{%2LhK5T!ZWJ{pHz=S0^><5!!&{gn`SPw9
zTG;Cd1Wbmfbs&pFHY*a5kz@0TGFcHpXuEPyqvFZE@@Gpo4*nzBp}?3>Cp188YKq>_
zgE-!DCD1HXG1$rptW6OYYEd2@tfudea!D}J!W&RxGdfu}+>mw1s?kbwRmUZa0hK^d
z8}OJ<Q1)=;xVJTvi=-0iNJRoo>!+zGJ%&=^BET!mGj`EzkAj6A>8SzIC}RzZqwfRU
zXf+X+TG`}O*GW?@pcA`|HDG;@!`$V4_W$7Uf3#*V1dTwq0S5gFJBgx{wRlK%Q9eD;
zcJ?n5ni0IXU0_&%rt#EAV%n5Ny;x0jkkoGlW}+5N(jEA}&6^Zn5lQ<`q8cJX)T}wp
zrrH<3GKy7m+oO1qIBuY5S17D6gkl(4ZV{g)ennP+{3+grUIRr#vw*=x625AJ#K5u%
zsK&%~Qh-x`B)UHQBy<m`0EF4^Nn~RGLRfh{lNu~mwDIH*30T+h6uLQqKDY`R?nOUO
zCMawaF$wRK8Edo+-D*@P(Bn;pK3c&Njo>$8ymF+4$aEk(#C=R9t1#9yYen%M$B)*~
zwU(qtD*b0<c>m|nckJ<s1~X?oe!I7Ai=>@=uCp6Tp{v@q1!I19$`6EN3T)Hf!9nx)
zS)4E@B7RYsW8z^usJ4#Gf&T1D8tgB)08m$LeoG7NLRty=-(FnfA@P<?!TcfNGuUn+
zVZIVB7LiG<aT_?HN@s+78e^1*cCkWyBS>($WH=;cV}rr%!Lf|3o->JMU`iEd5Kw|e
zvL3HG67UQZm5VbXOw}9fXWY5aci2m!nKW6~<DQ=U_3AEYwf(<Fq99mf4*PDPnlkY}
zOqL=G`?U%Zo&)f$*Za4r&g$98SnUXG9b9DZbO+EY_5p@44j_+ISBf3E_JzSWr!o*D
zY68R<rebL`!7*qU%OL3>8-Z9?CTkA(pd17m@W?7wFc9KyBen(8qxuD_wS&+s$cXI)
z1Iw)GDV?Os)e@)fmC-O62gCy)o`(&KLOCGUbt{12Km6u!BfnTYC!)!=O^pE_{B43=
zqJii584g9r%*5389FA<&jiI#i815UJ<N7hmoO}Xw-p)_q*GED4X!^0AO_=i#Rs$3i
zduilV7Kd^f$|p<WC-55?LMk$p%}|gzD%5GWh3aWt2Uy;m9K$qSm_}&adZd+Cqe%LB
z$P-}Qi|2XB`x35mvIq`N#C6;9qrxkgac1a{M%;sOacVf50!I{hSyH+1RiW&XBFx=P
z#8XJOII}}rHex}+e-dLwjMi&K6LM&9rR#P+e_vGeqs(tH2{_`2E9Zvy77BXdaz=FO
zke^LN{|4kCwZhgouSl7>B&?YY_Pdm(w$Twp*NtC!_&2_%>w0W0ASei0Z(6j(Tv>U)
zNUKsNA;xdUZ<8~22E`bAT(FKqB45C~8MLJigD(S33xb<s($ZnMI}aIODpJ|_N=(nF
z)pnY+Qk+6?CXLWyzFFgs5WGqNC;t`z>yU}u3rfgT=6xU@ht3on=fe+P1tHm7kuycl
z#Tl!4q(q?LA;*+ti7!blv`#uLgnVMK<`@u;@U^x#w1=P5<{P=Dw_a?M74z|S+-MB+
zuHQzeQhl0KpuxfY3<(ju=+_;;{9U<Q0!;%1$DIMz(F@WDV(UAA8r%J;4$)T;I*OVp
zybt;CeINZdcc#%V;C>t?pG{qt02U65t*&#kgHj{=aPh^PHCy`7qZY`mq#8Nsy4AHT
zhaPlbu%sN6+juQniG&}_t5Uc5ShvnGUL~~~8{I-N1clVddL@J{_K~OzMUzA67H+np
z{E|E9z}UVNHpH+{Ja19pa*r3>Fkkj3Hm@>CpWEk>#j>@z{o{B_70VrfWe4^^gy;<S
zwO4SM`VF%Z)DRq`M<S=v-lPou52#W=2VYmn0a$kg&a){DD69`JVR<MnDW{bKFr`?V
z213BU$b91`aq6(z=v7%6u-`+IfrxlmZH`gAinJ1uLQt3;LQ_t}CK5W52#D*1c!3Bz
zBG7>j4DCX^2F#jZOW2#1n~NuRd+9+hO9jju$!1VIP74yhaH~OmVUxq&$?!K4EeD<v
zAA8;zpxmcYWchs@9}oe^2Nht_jj|a*@+Zg(&LN~$vSRbg|G>_EgBKf{!k!VN=xZPG
zTkQ7(mj@RItlR>3IcaW}5SE-~!R3m5ctBHrkHJ#;&i{;fw2>3%zrfeHQM)>&dURpe
zQ>o?-MQPaZBvet6L$&pUvG@DC#0DaTGlIPy?VjZjUbIOl)b`TKy`_=!loS9Dg^K0O
zQ_Cr>i&=qq9nZh*_Ml{4b!}QAy+xsqJz=I249HEw9vtxy<>UOOa%CVC7@=^y-un?5
zd?2gc7}UfvLL5M$d4A~uzn|d~+`^=J%18Ub5p-`&!!TKB4phR<v(rpiGXou9WgfNY
zeO}HFg~S>oR#sV(b1nLAl(*a{?8}UQ5pV<P>?5&|K+bKfN?Il8*@tFQ1&$%<w;<nU
z<hlnbpz0O)U<t9Q;O_N|HjUEVoDzVN*uu@YXhQ6Yiu_FkG^pG^gwJ#f-clz3=hkiw
zgH>q=BW^gEU?60)HcZZbC2arZg6l{D?Pv!@$Lbgo?68?6u4i<bSCrHeDEIX7$oyOx
z0cr}3FuZ)gAYJ{T!|6=Tmk6k_=((9xQK_zE7r#w-&OT4}CjTAE^gc#}Gt_W(40}-|
zSFJZ31v8GL5k7!%54;xd2qycP5hV{+n4MibgzQn~2@uoDAMq)qA&bF6vTj=%=rC~b
zv|uFvYELk=#$N<q**l~dC*W3qoq00UtCs~GG|$3eWDvW;2Z=$yB(XrH<Gx|)^LzdG
zRD;_g>7Yc6f{vRrj?}!1m8S(<%FdLeEdOr+o_5S~q3M%&Pj~bZ0{9Z%(GLMOmk%m0
zDV}HXhuAH0p{j+0`YsV5%|ZpHIYRBPmNX%sz>Yp4V)GmU>=L;IHqhfBt~)rpWqe0^
zs}hl?20jwrOCo+j14Y}D#SbRJ)(&5O79l2RPx5$`3fw4Ce{0dXi$oj@+}Wv5(@&@1
z&|dQ1>O9ukxkDu|8AT+69W_IO%LEh)^%&xxxTX`gp}`aCP)|wXIPY_nX12<0NHmIR
zs~H<}B&q7v=;7io;x0~5V1qEt$#~K4l;9v(sQ?K4olM1mgcfZU)&xjBP<&KSnAn(K
z9tI!Np*1WF32xYcUex1pRR;u8YkSy|G}N}oFt?gCB&tEgO+L62zdE3>LvP@VWUQ%j
zUecJ`f+O532Ndhgs2y-UkWpc$I>5K&!_nePFRNc^NL)GH!<U);L&^t25GsdHP3L6|
z@`TXE6yPL2ZZ^jv0(kWey$inE$m464_QJm=`G;qqV(a7q-H&-af^mC+m%)n)4q_Ss
z+n7y6&k`x}TD(_?AyQh2-z=a2F-f(jj6Nn!7<r^HCx9Ly>P6G2v0zc{6!*(c#dw`U
z2_)0E{~_h3$S{m=ljktm&2$NlYr8?pmL=juL6w};jDR{fq)AFtM837Ai>Q$2Rb0`^
zOz|$em6AwFZ8atp@fWo#7>>{p!wQz<4{``+ns&kPbO@y4z`Sq$@IV6?FL6udF=Hk6
z>Wpfqm4kbOXgoZxiZPY^v}6@gXsap(#V`-2!ssWoq=Fs9)M&xE)6m+&8_EP%=&pha
z0E#FXxN{WAUS_ep4VEJZ309uyQviXdE#)D_+rWjY<CS0F&lU#y;(4c-=`wnkb<GF@
z8tDhnyUG|)qT=&VMUZ*!A!QcWXGr!h*c?*7%LZv_k|?`u;~Z@HbwNlQs%xbgtRG4M
z&82|eVcY|T%$#kOvTCOpbdYO}IgChI<`kr%IOt5Lzr%yyihjbg*u~dBO$H!gof&;k
z#c_yc9v@+Jn9NG#WYl=kNG_l`<XmziPlXp$vx50n#1b3asg-l<6lq^sT#RczK?xY1
zFwuxdchVRUkYQM~Nr7`76<E3oE!983Yk+<v-8lIx&Y3&~OieI`Iu?Htt0!8(QY{_q
zWo4=Aln8dN88~prGY5l>S8k`J$PzM_dXhV6B*>IFI}lxN4F*9*Z^C>9XJ@io!Ojqs
z6j~;)dE7|&%*}&HT3tcSf17_AUGR2L?sxF|*p9o@E7r@Ti-Mr-2up2_s+dN*8)j=^
zMdsFCx&!pcpbzcFi78Wb#zNgrj57R{;VfbcOPemK4`V8GR??YB>yC)6cm~&?MgBXp
zk_~_Zn>`(gSj3eGj?-rzOT;1?Pt>=Dj`0`|E;shNZ4r<VIPAoJBIW@i<_|?sy+Wc^
z)TOYnrbf3JMa)){L+FZMDHM9N7N|R=g26RF=tr_8mXYUlCO}KR2XKi>+pt$5G}t!C
zax|m!B=%UrYJ)lfPuV>VGh%ZmO2Z(uQO{<cj@b($Qe%~p=?E5BVW+UL!F<D}b#>M>
za4QZ5tycl@t40JW<A<!^YA>s2Y_+IfW`#IQi;b(mTW9y3KS9<xzg#Av;-7dP)@{xt
zS7J>lXS{612RPIh<oBl7*iONn^4H(iUy$jz1T^_-(C1ezGCWQ3HJKr5t;-B4)1k11
zC*_Uyv&OihpmO!00z?mpzzdp_RpB-e<b%+N+q*@k<t1U4+LA~~#t`z!lJTVgAlD5;
zbV2Q?)-=v&f031&pudHrt%^1%X)t(6;5K*MhM1;fK!D-4b#7h5G`7k1OIR>4!CH(A
z2ssKyG^}o@k3qK(0=8}eI4w#%qY$*7yEk#}0A<rUFw=r;vE@jI*BSIq1j=T9z5bg)
zl+pHZV@r=SUCIW3RhcFUUa%b3%otHsI#q;9A0`vb!b&W1MC4@M|30<?MLHA$!Flr@
zaK^;rM+9EtJU%{~$j8Hf)moy>(@2Qv!>`HHcJtL3Yh6!fIia@jvE9+rk;ll9U#9KI
z+S+(p+Q9%y$B-onBWfe`;JFD`bbfEH%FC+0;5&B>o?d0@WfJa!mjy*G31;y@iVD1I
zJSZJKWGrQ-1A(B2w<e2-L6)QP34jeITW|W`4Km25&XYXo!j&y*ck`T`pp7N2+gIT*
z{anasg$|eZmx-b%J7Tqv?=1qEU~SF{8OzT06TXaINix`?W{W9?B25*3Ya<j(sBS6r
zT_z@1gQu;5zP~z9z1<<sYY1qdlJko3MEVk9<jMDii}FAk_Y;|-#!^hFM!jz3#q?Yj
z1y0kw$==tQr2D#=j9F+=8(h4XWYXzMN83t@ZY8(NEeqmkt~}?g3--o*eFPywtB{QP
z{vE8Eamm0ZLss`P;qw@6SXgEjpZxbvjv}4}9X}B*p$tNIIZERG2KVSCN(hjYDz*eM
z&IH4BWJ~8>3*GbCuA^SF4IcXI(}R6a8|R?zZr`_KCF)!eVt&6390iQ<5F2v#qyAL`
zGz}H8u0=-V_uCmFWDd*|Wr>Tt1TBH@%hWHW7Z4<|Ml55*Iw{7~6!#(=Sv8}BcL!Dx
zetK5~gWA@R5PmKFwb^VWgjPzAqeZrlcyW4In~8bb)YQer<YzEbux;zFybLbhxK~x9
zMLov+)yBdlZgZet(E`T<Wiw5&FUfWWm7r=gxRnGRz*N8mGSrsE0Rysed!h(cA-cuj
ze0jIu*xwbYic8eT+_E<SD<SlF5JYY!L22aqrn;{ybr^d<E=LB%@;Wtv_*W(7XX#lF
z8)v4+<V2YBYE7$n3jVj+FSm<E@ZyD#*^z#$LZu<L1i`mXkj%1?v$k2K!%U*^t=cvM
zet8=Fwi3yK{6|**l@zWI*LI_nrLxey4V+jI=wH6<iQqH4${$Vnp?f0AnY+Ol9Im}#
zpx!4n42fkyV{)7|gg0bhoL&}cSlju{P_bse9`~hKd8koXi>;OLr^FzkX00eADTK4C
zpWS}tK%B%WpqCfoN+N)gQ$F=br_}vb{k^8%W`%2!1rJ4$J+JtOUyF;dA|M=$<;kwl
z;|@7%qSVZPfb@1EhXWWYm;WaUtyxC3hyUG-0LuoL*O&!f%gYgRJ-`{!wg{3;H*|C0
zh-cd(;)4Alz5?XKvb4c7DyD}dF_AFt$6i+c)C=R(iEYR{DYovlQyUYUt#08Y#eaXr
z1BbZN{|o9Hx*97Oo``aJdk*WApme@Zqns8d+tQgh9$mypZCcctL>G+hm~k&IhDd9u
zTNr;nk+;pGsi_=}aSV-iR#bEYBhSLhK-h3U5Q4@vOoRAAAf{p6NG@vGRMUJhFFDQ1
zYQGb!HauFq=_fjXC6@Ii&o4+j0CVg~&tD9tVHNBiPSJ~$2l*EyS&FFMnuFx7qb>G*
z-<s2ly$wSNzuzNO$H*7ePq_*~FZ%+X<JTnI6RvLefu>bHO$@UMnz<7Tu@@TEc;PnZ
z<~Y=!U#Mm%bpd3o210ML;+wHe6*mYtR19c|7ZKlTCxgsC%(k}a7XxOx9BndpdQLP?
zi{gQQ(9Pdb36qgZ*3mrS!0j-&;fF6!$=)15GYIdLIR=D9B_OGdfd`WJ35$~EyhNFc
zusRuxvJ>Qo%5oSEd8FL`^dKzH?g>O98Pj&&ChlMocL%iwwa#zg!kKqeep?qMbKoyV
zW=VUVpUZMEds=&|F&l%;Wx-gmBr}302F4x!4y|lbqryd@x^0mpmq%s-b<*|5bjVo5
zFAU%MF+%j9#wB9aOGBK@Yey()5A6(^Kpa3R8(M6e8Hy+2TRd;$)5us^^?s8JwI@(%
z%-(<*P{eYa$qZ2*qv2I*zwow-gMYPd<8{0yr-JsGB%H^x$4_KEIhR2OFLHTyB~zKk
z&S!<=cu8=f$2oZdm(xqLoMlL96lKAK^6^a0gzjR!CWA5<bOckNRf1P2c7RoZn)~L-
z!haqsz<AZp>O?WM9%srpXbD@v9ZVWt*l!+H9*{cOU^c5jn2J+?W5A-w%<a7#N0>M!
z9bMwmyJ*Dr?fTP}O^5NfTOeqN(xKt5&8^YL1!PilsnI@Y)+^5yfVR>uaYb;0L{(-Z
zRW^DPh+XOpx8E3<tb!4%&7T_+s5_uPY$P;|&J|({ePTJW%xT>$`7zzW&Ni*8YNLfQ
znUb<MXrG{pzQ!VzbP$L-g7gZ_Mmx#~2<#K7K`&w@BC~;_78411@1=tLl?17k9s|yq
zzf{1CO<vaBNMs4`2Zlnr+bhZ1RX$M#dvEr9I>>Ihe0_gUF-jX_kge1ZdsiBReK??z
zXFuLiggN!L0T+@<q;Bn;b3YDCw1tLv!C|miygJ_Qs(;Nqtyew(?d>Torw$id;vA=O
z9>CGhuTN;PsCRE>TV{c!voB*HRs)^fe6B9TVh7IeIE21|%Sf7Ou=TeEO3Ne(dYGgO
z=LTv8skQ|=JQ|7_yJ(GPQPrBKl8Aap{6jrP^*0*<xQIIU!kFTmNz8Pk@2)dq=G_g^
zP|I82IysdxMoO8ul>!4GcK1nz?MyV9m{3#>R_k{zPHk|tC-VrNDp#z20zZKz32J43
z31iH*V47NEj6edMoI7H;2?g@K)!60p-*F-z9@Adhyku~W0oC9$Q7uIvt9)m-c8H8*
zi{GGL@SX;-3k2VHRrw#s1+a7hbl<}fTw20A_Ia9~%9BJd)Qp8_0No5o0As&%@W7o|
zV0O_)m<VsMcZGF4nGY9|!@TR;1RNB(g(o>DmQQ?bm7geV{C&`C3luD%^>w`aZ;odf
zw;E9sICQO)&{8bO-ETJ0eZQO*IXdI;?%Hy#d7C-YDXVDR4FqeB^>>HxzC{Eu2AKHb
z&g|3WxnD+je1c|51e!3nQIM_fB)XPW6h{fmG_^P6RxvBVZ(lV;qSi++Rq$uHf58WP
zvKqiL(^-VA%v)R^l!)X4b>$F$Fcd2wriy$x-2z7~%4CBqB^7DnAXTT1)<4=thqgl?
zN+jEGYq-}fS03;@;_vp<kB=J3jTgPgAw{RYUs2H?CLoNeO_FpC=2U~TU~AJ@pQ`_5
zK`>-X3Y~63XN1Kdg@G$;iU%NrAe*?X;Mn?ATXQLKK}H;Tp=E3H$6OZBuTi?5aj9|X
zN!o9T);U`v%-#5Dh{lxGFDZATcCHjd0p!`$A+fc&S8b=A!6G6p|C%pxR%KJs$c+3I
zreZ9LB(oNb>sFuw$&?>zpg(<tu!+4%Y%H6Xj3-|9d`D+0ueX*hIX1&<$w*(hkS2h2
zB<tqs8;en@Qge_k7h50LxPjun@)CfgL;+iWk>O~^ak95QO4Q2^pE^~?3A>z5C^R{A
zlY<IJwaF=7%1$A^=;#l{eK55R<MzC3LG+RT#xg@bOB9eve2-&|Z)EYZ4Tg&WN321>
zJ8Ot6_qI0V+xntGG~&;A^YCwJ-N%d{@@A){rujokC(y{aezGuu?<+I*ZY~#7EDVAy
zh=oZciR?M+Rq5+}H-Mp2mEJ`B#S>ZD)!6^&tND&F%Q6$olBAK%(gCW)vrW~_<p}9&
zb&e69+a1=LX7@ra<^eOfq1ENCgiSS)i4?eCu7;mcnz;~$Yh0$JKhB{#DuD5d^pHUH
z5M~y}CeX4i*t8&aSUVnapwZiFabjYJ0wR6q+e$*wFEgiH;is@bcckh-UQPI1h$p!S
zjt2GCJZhxU@LZhk_IN|=HYy5bc=rHsuEWD7x*e9zO)(iyuqS~ejQ=~k!_>r2ztv0}
zi}_djZ>8bBx;K#&KBX$<K?5nR!?bY&bZlwAD@NYx<A}7p+U<BW`5~<kFO>n0mGGkE
z-R@KSnrGOGeOSm&i`-dTp2Xr#8JrI66u%veXbe6*!d@hJCGCP|tbGB#w|g}$Xb2%_
zwB0XJ)ZG36DkY=1czg1AXy<}c=uuvs(a+s5_(DT7`DRNEyjJosXK^XcCoZBKsZ*oq
zQ~to%f##|)X!Zc@+q~z~$^`FDZ6casrCvnL7SR23Z~zs4>KLaZMK&%3PJUdE9SDoX
z+*N<tWurtM&1GaL5m-kN(xw{SoSt^FaGtjx0y`h~(zsLEtV)I<Sn4+NTQrny#Bl#;
zZj_{<8{E3rJYOtk?6Z-kMYoY)G<xcaH@F~6>^LI^nfRrSMI5kf48jLpvPA{KNU0)R
zT!Lo5svY{?QCW4DMwcCF?X`tyasnLN2!fV%uLzhB5ewWEK&{0EYolg-QXR5F$Ubik
zG&oXz6W17VSGMCZb=*~uEiZ**1D%t~x%3ta_Q(G>#-Eze_g4j;!C82jh7Ei)m*SD2
z^LC|G!8wwIJ+XR}=eM~I!j456r-h5RF&q?2+Utg>K!G2H6*P^!!lg6hgT9B@a1#=l
zcMAHGKATb)F^j-x@>OU1{^lpB@8Gk2{@rtVcs4$v$FqL!AoABh-=cg|c)alcFaF=c
zGK{OsB;SQGhZBD<|M~wPVf#hH))2o-a5FnQcQc#!vvvKvl6-qE0UcK`L9JE|dHk*z
zSan}ctizGVG7M!9ob*U7@FZJtnEdqrvfWDqqVLbd9wGl<w*rDMhMjNJ+}Mt@4nqI(
z!{?sr{M<jDR%Q;aZk)c+kB<~VNApX+8R893zKd3|y@fmp`#z)NdVY>Rb>r;LC2r?v
zE8Gd+N0P_Z{FBx+pMGAPJjVcS$dx}V3B6@MV6{ZgKj1wO9s1jTF5WM=Ha?!N_YdDU
z{XD(p{$FZHZ!b<Bj~DHh%*@DpcT;QF>7L=)^<PQx`g_~HOaHhnHT#m?zW#oh=bY@-
zJI*?qbKeO4JK8Ke{dmQ8WV#Vr+MA0b_Qfk-A`ain|I^jYx4ru@c~d9+Zus3X_uJj~
zIeLiNlG_sP(9q!L#>u@W$?@0Y=i$oE$a8AVHs2Y$?`Jc2f4t${mTqN#b$58Q|Ht9Q
z_rpW()W7$cgO!omSrYcw2lhIXogwCW=2jBTdZauDuMxEIw%wg=Vuk$P&W^)zL%q+v
zp}lh#x0Zw1%W80c|Bs?ej32*0yL*56cm{;^{<vl%-}-u+yFXnvYeI9RyY}+_`x4IA
z-z9eN_v?7)-}V0x#lyKg^B;5kIblcow%r-tIHv9s^!W%?c%0a9WU8CJ`RypQz+8Vj
z%k(ogvQs;k=q4>^FYp_(_Dj#QukT<s|NG^en{UGLjjDDk`|@$9boVPe|MSR__|NCe
z!Nyec)==}%O0(kQW8K~!fskNbX~I8EOR%iD*C8&0wl7LkrKi1jNfDzhgPVWvQ+zy^
z{}*Q)675QRZ4{U1|Ex^d8c)_UBc-2GV{6{0ZzcDA93DN+{Ux5OpWP);=zTjJ+42-J
zr+ixd>0KN{);!HQsPDZ~3Ay^Y{@QuIT=VzG7ri?@?TCB%x!@D%PbS_UsR`cr*FNf>
zF;&0rAEqyKt|-)h$-MpT@BSmqXQL4UlCIbrzUK4y=d<-KKio{<;|KYUFYP=j=HhwB
z5t;iVTyOpTIs56!9+lbuy8T_&ynB3^SoMi`@6z_;2b=`tcID@t^PeK~PVu*Yz#8k1
z@XZf#JV?n$Pxi#!4h)Et0@0V+!*6kr@1>sq_EZ=I?5_7yxcuGyEB{&fe~>!skN@<~
zPsisEGdc6OtT&<G&xFXg0qow!aq}8<-jVa0-@=|S$v6G6?VF=*{QX|Imv1kfaNqpg
z!GH##nZCge6W8ha($wYN_V?db`Ga3lCgi3MkN?){nd`pq`}g9dgtLN(S1zGx-~5iB
z{l4Z5SNw^NzbpQ8^QCXM-!s%d3_QYznf^otm&|*;iTRFmpy51M<$*2!yp5Od680_s
zc@`q{ZsM&=&+xfksgl=h!`qpk$+bJfKP$xlb8<qSuA?&})TocbU*%gv6C*nlJI}!3
zcj|h_y1g8jk7H&{PrAejEG4O>M&a1Qy$R;S@%+#K6)XhP`}}7ofJUbaQaVrV^?6IL
zpE2+DSv%j(p1tgc_N?#q@baA2{=-W$Jtt$%2jeSA?-nfakZUU5xKLpF@!8!m8(Wx^
z-}x%kRAiK%`(erX9Z>51hXQzi<8n|IJiGer?zhUYv>0c1Cl|LTSJ!tZXUAo_Rv7KP
z>G{dup5618|9bM%Yac_b6sNhq(g!}de|dTF>N}S6C+Er)zqXold-<}}SNYlNxt%}b
zVXn_!&eyM=z07HQ)ehUM{p{73SM#sGw}oum_6M$`k9@2H{CHk>XSct*ee%P;{)GB3
zP56)X-w*Ke{(lsj{jm_e!w(<~e@gv-j`X<yAK(@H|9|ce_lF-W4E*l+FGKG6+Wz;^
z9^=0c@LKi1v?7In`f4#yt8;fxh5z>c)vLpS9$#bKhPaPhk)OQ*0pguiJN)=}6b~c5
z5W?`=YtAa1=u+tOPmctXcW7L<AHTZ3zXGRL1ao<L{qG<9YCd*s;z$f&3V`}tc*Vt~
zq4VR3esX?(!9o9U^G`No{7wk{;o?)pDi7b_b4N7v<###j;mR8DT@_AdN&ZVL$jx`x
zLM7s6Lkk+axc%*S9O|b!ZV`5R7xLpSe8(=#=brVuzke4vvZne6YyuT&h9|tU3FS+h
z@P}=|v1t0$zyDEO$uC%gi`$RS50{5n?gh5~!F%zG_0?~8;6HLP@86Oj{|ml*@0P@o
zbB_1#N#395x#`$<_N2aNPhM*U{_J!jc>`{4bgR_jRQNpm4cqjyt544^FV2rc6wayM
z0M^~UAl1~Hhw~RF|2oI^uct~}@2~imH0NKR!{oR==M)!libdf1`0Xhkmhj<_n}a0a
zmip`E#ntbSlDoT4ZeM%^QSkQpo5RKBMWn#bUtPcX$d`Nj-@g6)?~$qbqWh7A<2@3!
zj$=ac`y@(-Hs)K*)ZdY)_9tlhUxcCEUQ|y^?^}P*9;_pI*3kR*K!=v++AWOt!xr?%
zE%@fu+F#_sQ*~1;?00OzI<hNze%}`CFN#Nk*FN6agYmcr-`1~|^7EZ1ti5Lsjz{uX
z_V?@ohfc;Or7gd+2lH_czO`piK{BJB%X|00M=})i{d+(oA|sUF-GlAjJ&4u&j^IM2
zu)lp+pU<CCLlMCG2&KOZrJp`R>E8#G?(dZ-UcEYee18<qud1Wu@hF#HKIj{w#|tgh
z&DGiEi<7@!rT>$&o5RVQi`!f6$f?4~^OMi6?@wOdUtIcciZFE$U0WwF58A!M$=%Io
z-n{D*-S4?}<N23D%Q*kLk>tZWe^dy$KJ^|Wkzb_!`Q$-T^F2ml9XU&Xuc3}Z2aoc5
z$4KnoVI;oMNI%+%M?3N3uoK^|V2j>CL-u<V>_`M*AozO}?06{8y?(40KE57bkFUqq
Q|Bu)I10^F{h5+IK0As3ms{jB1

literal 0
HcmV?d00001


From afcd7b9f0c02178bf6f71fc26a078538666afa3a Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 16 May 2024 18:12:26 +0200
Subject: [PATCH 36/56] Test hybrid search with hf embedder

---
 meilisearch/tests/search/hybrid.rs | 121 ++++++++++++++++++++++++++---
 1 file changed, 112 insertions(+), 9 deletions(-)

diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index 67f7909b9..028b341cb 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -5,7 +5,10 @@ use crate::common::index::Index;
 use crate::common::{Server, Value};
 use crate::json;
 
-async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
+async fn index_with_documents_user_provided<'a>(
+    server: &'a Server,
+    documents: &Value,
+) -> Index<'a> {
     let index = server.index("test");
 
     let (response, code) = server.set_features(json!({"vectorStore": true})).await;
@@ -34,7 +37,39 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
     index
 }
 
-static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
+    let index = server.index("test");
+
+    let (response, code) = server.set_features(json!({"vectorStore": true})).await;
+
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({ "embedders": {"default": {
+            "source": "huggingFace",
+            "model": "sentence-transformers/all-MiniLM-L6-v2",
+            "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
+            "documentTemplate": "{{doc.title}}, {{doc.desc}}"
+        }}} ))
+        .await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, code) = index.add_documents(documents.clone(), None).await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+    index
+}
+
+static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy<Value> = Lazy::new(|| {
     json!([
     {
         "title": "Shazam!",
@@ -56,7 +91,7 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
     }])
 });
 
-static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
+static SINGLE_DOCUMENT_VEC: Lazy<Value> = Lazy::new(|| {
     json!([{
             "title": "Shazam!",
             "desc": "a Captain Marvel ersatz",
@@ -65,10 +100,29 @@ static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
     }])
 });
 
+static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+    {
+        "title": "Shazam!",
+        "desc": "a Captain Marvel ersatz",
+        "id": "1",
+    },
+    {
+        "title": "Captain Planet",
+        "desc": "He's not part of the Marvel Cinematic Universe",
+        "id": "2",
+    },
+    {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "3",
+    }])
+});
+
 #[actix_rt::test]
 async fn simple_search() {
     let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
 
     let (response, code) = index
         .search_post(
@@ -98,10 +152,59 @@ async fn simple_search() {
     snapshot!(response["semanticHitCount"], @"3");
 }
 
+#[actix_rt::test]
+async fn simple_search_hf() {
+    let server = Server::new().await;
+    let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    let (response, code) =
+        index.search_post(json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2}})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
+    snapshot!(response["semanticHitCount"], @"0");
+
+    let (response, code) = index
+        .search_post(
+            // disable ranking score as the vectors between architectures are not equal
+            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.55}, "showRankingScore": false}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
+    snapshot!(response["semanticHitCount"], @"1");
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "hybrid": {"semanticRatio": 0.8}, "showRankingScore": false}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
+    snapshot!(response["semanticHitCount"], @"3");
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Movie World", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}]"###);
+    snapshot!(response["semanticHitCount"], @"3");
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Wonder replacement", "hybrid": {"semanticRatio": 0.2}, "showRankingScore": false}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
+    snapshot!(response["semanticHitCount"], @"3");
+}
+
 #[actix_rt::test]
 async fn distribution_shift() {
     let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
 
     let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"semanticRatio": 1.0}});
     let (response, code) = index.search_post(search.clone()).await;
@@ -133,7 +236,7 @@ async fn distribution_shift() {
 #[actix_rt::test]
 async fn highlighter() {
     let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
 
     let (response, code) = index
         .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
@@ -184,7 +287,7 @@ async fn highlighter() {
 #[actix_rt::test]
 async fn invalid_semantic_ratio() {
     let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
 
     let (response, code) = index
         .search_post(
@@ -256,7 +359,7 @@ async fn invalid_semantic_ratio() {
 #[actix_rt::test]
 async fn single_document() {
     let server = Server::new().await;
-    let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
+    let index = index_with_documents_user_provided(&server, &SINGLE_DOCUMENT_VEC).await;
 
     let (response, code) = index
     .search_post(
@@ -272,7 +375,7 @@ async fn single_document() {
 #[actix_rt::test]
 async fn query_combination() {
     let server = Server::new().await;
-    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
 
     // search without query and vector, but with hybrid => still placeholder
     let (response, code) = index

From b17cb56dee5a21574d3a35b2e92a17f04a44db47 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 16 May 2024 18:13:27 +0200
Subject: [PATCH 37/56] Test array of vectors

---
 milli/src/vector/parsed_vectors.rs | 59 ++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/milli/src/vector/parsed_vectors.rs b/milli/src/vector/parsed_vectors.rs
index bf4b9ea83..4e06177de 100644
--- a/milli/src/vector/parsed_vectors.rs
+++ b/milli/src/vector/parsed_vectors.rs
@@ -147,3 +147,62 @@ impl VectorOrArrayOfVectors {
         Self { inner: Some(either::Either::Right(array_of_vec)) }
     }
 }
+
+#[cfg(test)]
+mod test {
+    use super::VectorOrArrayOfVectors;
+
+    #[test]
+    fn array_of_vectors() {
+        let null: VectorOrArrayOfVectors = serde_json::from_str("null").unwrap();
+        let empty: VectorOrArrayOfVectors = serde_json::from_str("[]").unwrap();
+        let one: VectorOrArrayOfVectors = serde_json::from_str("[0.1]").unwrap();
+        let two: VectorOrArrayOfVectors = serde_json::from_str("[0.1, 0.2]").unwrap();
+        let one_vec: VectorOrArrayOfVectors = serde_json::from_str("[[0.1, 0.2]]").unwrap();
+        let two_vecs: VectorOrArrayOfVectors =
+            serde_json::from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();
+
+        insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null");
+        // 👇 is the the intended behavior? would rather expect [] here, but changing that is a breaking change...
+        insta::assert_json_snapshot!(empty.into_array_of_vectors(), @r###"
+        [
+          []
+        ]
+        "###);
+        insta::assert_json_snapshot!(one.into_array_of_vectors(), @r###"
+        [
+          [
+            0.1
+          ]
+        ]
+        "###);
+        insta::assert_json_snapshot!(two.into_array_of_vectors(), @r###"
+        [
+          [
+            0.1,
+            0.2
+          ]
+        ]
+        "###);
+        insta::assert_json_snapshot!(one_vec.into_array_of_vectors(), @r###"
+        [
+          [
+            0.1,
+            0.2
+          ]
+        ]
+        "###);
+        insta::assert_json_snapshot!(two_vecs.into_array_of_vectors(), @r###"
+        [
+          [
+            0.1,
+            0.2
+          ],
+          [
+            0.3,
+            0.4
+          ]
+        ]
+        "###);
+    }
+}

From 9969f7a638102473fa6f404c1bffcec9f1d866a7 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 20 May 2024 10:23:12 +0200
Subject: [PATCH 38/56] Add test on index-scheduler

---
 index-scheduler/src/batch.rs                  |   3 -
 index-scheduler/src/lib.rs                    | 230 ++++++++++++++++++
 ...x_scheduler__tests__import_vectors-12.snap |  19 ++
 ...ex_scheduler__tests__import_vectors-2.snap |  20 ++
 ...ex_scheduler__tests__import_vectors-4.snap |  23 ++
 ...ex_scheduler__tests__import_vectors-6.snap |  11 +
 ...ex_scheduler__tests__import_vectors-9.snap |  19 ++
 ...ndex_scheduler__tests__import_vectors.snap |  20 ++
 .../Intel to kefir succeeds.snap              |  49 ++++
 .../lib.rs/import_vectors/Intel to kefir.snap |  48 ++++
 .../import_vectors/adding Intel succeeds.snap |  45 ++++
 .../import_vectors/after adding Intel.snap    |  44 ++++
 ...ter_registering_settings_task_vectors.snap |  36 +++
 .../settings_update_processed_vectors.snap    |  40 +++
 14 files changed, 604 insertions(+), 3 deletions(-)
 create mode 100644 index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap
 create mode 100644 index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap
 create mode 100644 index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-4.snap
 create mode 100644 index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-6.snap
 create mode 100644 index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap
 create mode 100644 index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap
 create mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap
 create mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap
 create mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap
 create mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap
 create mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
 create mode 100644 index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index 40398dc37..1f5ec76b9 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -952,9 +952,6 @@ impl IndexScheduler {
                                 .into());
                             };
 
-                            /// some tests to consider:
-                            ///
-                            /// - dump, then import, then change a document with autogenerated vectors
                             for (embedder_name, embeddings) in embeddings {
                                 // don't change the entry if it already exists, because it was user-provided
                                 vectors.entry(embedder_name).or_insert_with(|| {
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index dd2b296f6..f743422a7 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1774,6 +1774,7 @@ mod tests {
     use big_s::S;
     use crossbeam::channel::RecvTimeoutError;
     use file_store::File;
+    use insta::assert_json_snapshot;
     use meili_snap::{json_string, snapshot};
     use meilisearch_auth::AuthFilter;
     use meilisearch_types::document_formats::DocumentFormatError;
@@ -4982,4 +4983,233 @@ mod tests {
         ----------------------------------------------------------------------
         "###);
     }
+
+    #[test]
+    fn import_vectors() {
+        use meilisearch_types::settings::{Settings, Unchecked};
+        use milli::update::Setting;
+
+        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
+
+        let mut new_settings: Box<Settings<Unchecked>> = Box::default();
+        let mut embedders = BTreeMap::default();
+        let embedding_settings = milli::vector::settings::EmbeddingSettings {
+            source: Setting::Set(milli::vector::settings::EmbedderSource::Rest),
+            api_key: Setting::Set(S("My super secret")),
+            url: Setting::Set(S("http://localhost:7777")),
+            dimensions: Setting::Set(384),
+            ..Default::default()
+        };
+        embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings));
+
+        let embedding_settings = milli::vector::settings::EmbeddingSettings {
+            source: Setting::Set(milli::vector::settings::EmbedderSource::HuggingFace),
+            model: Setting::Set(S("sentence-transformers/all-MiniLM-L6-v2")),
+            revision: Setting::Set(S("e4ce9877abf3edfe10b0d82785e83bdcb973e22e")),
+            document_template: Setting::Set(S("{{doc.doggo}} the {{doc.breed}} best doggo")),
+            ..Default::default()
+        };
+        embedders.insert(S("B_small_hf"), Setting::Set(embedding_settings));
+
+        new_settings.embedders = Setting::Set(embedders);
+
+        index_scheduler
+            .register(
+                KindWithContent::SettingsUpdate {
+                    index_uid: S("doggos"),
+                    new_settings,
+                    is_deletion: false,
+                    allow_index_creation: true,
+                },
+                None,
+                false,
+            )
+            .unwrap();
+        index_scheduler.assert_internally_consistent();
+
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_settings_task_vectors");
+
+        {
+            let rtxn = index_scheduler.read_txn().unwrap();
+            let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap();
+            let task = meilisearch_types::task_view::TaskView::from_task(&task);
+            insta::assert_json_snapshot!(task.details);
+        }
+
+        handle.advance_n_successful_batches(1);
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "settings_update_processed_vectors");
+
+        {
+            let rtxn = index_scheduler.read_txn().unwrap();
+            let task = index_scheduler.get_task(&rtxn, 0).unwrap().unwrap();
+            let task = meilisearch_types::task_view::TaskView::from_task(&task);
+            insta::assert_json_snapshot!(task.details);
+        }
+
+        let (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) = {
+            let index = index_scheduler.index("doggos").unwrap();
+            let rtxn = index.read_txn().unwrap();
+
+            let configs = index.embedding_configs(&rtxn).unwrap();
+            // for consistency with the below
+            #[allow(clippy::get_first)]
+            let (name, fakerest_config) = configs.get(0).unwrap();
+            insta::assert_json_snapshot!(name, @r###""A_fakerest""###);
+            insta::assert_json_snapshot!(fakerest_config.embedder_options);
+            let fakerest_name = name.clone();
+
+            let (name, simple_hf_config) = configs.get(1).unwrap();
+            insta::assert_json_snapshot!(name, @r###""B_small_hf""###);
+            insta::assert_json_snapshot!(simple_hf_config.embedder_options);
+            let simple_hf_name = name.clone();
+
+            let configs = index_scheduler.embedders(configs).unwrap();
+            let (hf_embedder, _) = configs.get(&simple_hf_name).unwrap();
+            let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo")).unwrap();
+            let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo")).unwrap();
+            let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo")).unwrap();
+            (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
+        };
+
+        // add one doc, specifying vectors
+
+        let doc = serde_json::json!(
+                    {
+                        "id": 0,
+                        "doggo": "Intel",
+                        "breed": "beagle",
+                        "_vectors": {
+                            &fakerest_name: {
+                                // this will never trigger regeneration, which is good because we can't actually generate with
+                                // this embedder
+                                "userProvided": true,
+                                "embeddings": beagle_embed,
+                            },
+                            &simple_hf_name: {
+                                // this will be regenerated on updates
+                                "userProvided": false,
+                                "embeddings": lab_embed,
+                            },
+                            "noise": [0.1, 0.2, 0.3]
+                        }
+                    }
+        );
+
+        let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0u128).unwrap();
+        let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
+        assert_eq!(documents_count, 1);
+        file.persist().unwrap();
+
+        index_scheduler
+            .register(
+                KindWithContent::DocumentAdditionOrUpdate {
+                    index_uid: S("doggos"),
+                    primary_key: Some(S("id")),
+                    method: UpdateDocuments,
+                    content_file: uuid,
+                    documents_count,
+                    allow_index_creation: true,
+                },
+                None,
+                false,
+            )
+            .unwrap();
+        index_scheduler.assert_internally_consistent();
+
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after adding Intel");
+
+        handle.advance_one_successful_batch();
+
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "adding Intel succeeds");
+
+        // check embeddings
+        {
+            let index = index_scheduler.index("doggos").unwrap();
+            let rtxn = index.read_txn().unwrap();
+
+            let embeddings = index.embeddings(&rtxn, 0).unwrap();
+
+            assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
+            assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
+
+            let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
+            let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+            let doc = obkv_to_json(
+                &[
+                    fields_ids_map.id("doggo").unwrap(),
+                    fields_ids_map.id("breed").unwrap(),
+                    fields_ids_map.id("_vectors").unwrap(),
+                ],
+                &fields_ids_map,
+                doc,
+            )
+            .unwrap();
+            assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
+        }
+
+        // update the doc, specifying vectors
+
+        let doc = serde_json::json!(
+                    {
+                        "id": 0,
+                        "doggo": "kefir",
+                        "breed": "patou",
+                    }
+        );
+
+        let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(1u128).unwrap();
+        let documents_count = read_json(doc.to_string().as_bytes(), &mut file).unwrap();
+        assert_eq!(documents_count, 1);
+        file.persist().unwrap();
+
+        index_scheduler
+            .register(
+                KindWithContent::DocumentAdditionOrUpdate {
+                    index_uid: S("doggos"),
+                    primary_key: None,
+                    method: UpdateDocuments,
+                    content_file: uuid,
+                    documents_count,
+                    allow_index_creation: true,
+                },
+                None,
+                false,
+            )
+            .unwrap();
+        index_scheduler.assert_internally_consistent();
+
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir");
+
+        handle.advance_one_successful_batch();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "Intel to kefir succeeds");
+
+        {
+            // check embeddings
+            {
+                let index = index_scheduler.index("doggos").unwrap();
+                let rtxn = index.read_txn().unwrap();
+
+                let embeddings = index.embeddings(&rtxn, 0).unwrap();
+
+                // automatically changed to patou
+                assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
+                // remained beagle because set to userProvided
+                assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
+
+                let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
+                let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+                let doc = obkv_to_json(
+                    &[
+                        fields_ids_map.id("doggo").unwrap(),
+                        fields_ids_map.id("breed").unwrap(),
+                        fields_ids_map.id("_vectors").unwrap(),
+                    ],
+                    &fields_ids_map,
+                    doc,
+                )
+                .unwrap();
+                assert_json_snapshot!(doc, {"._vectors.A_fakerest.embeddings" => "[vector]"});
+            }
+        }
+    }
 }
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap
new file mode 100644
index 000000000..718ea229c
--- /dev/null
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-12.snap
@@ -0,0 +1,19 @@
+---
+source: index-scheduler/src/lib.rs
+expression: doc
+---
+{
+  "doggo": "kefir",
+  "breed": "patou",
+  "_vectors": {
+    "A_fakerest": {
+      "embeddings": "[vector]",
+      "userProvided": true
+    },
+    "noise": [
+      0.1,
+      0.2,
+      0.3
+    ]
+  }
+}
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap
new file mode 100644
index 000000000..bc16fc8be
--- /dev/null
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap
@@ -0,0 +1,20 @@
+---
+source: index-scheduler/src/lib.rs
+expression: task.details
+---
+{
+  "embedders": {
+    "A_fakerest": {
+      "source": "rest",
+      "apiKey": "MyXXXX...",
+      "dimensions": 384,
+      "url": "http://localhost:7777"
+    },
+    "B_small_hf": {
+      "source": "huggingFace",
+      "model": "sentence-transformers/all-MiniLM-L6-v2",
+      "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
+      "documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
+    }
+  }
+}
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-4.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-4.snap
new file mode 100644
index 000000000..013115a58
--- /dev/null
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-4.snap
@@ -0,0 +1,23 @@
+---
+source: index-scheduler/src/lib.rs
+expression: fakerest_config.embedder_options
+---
+{
+  "Rest": {
+    "api_key": "My super secret",
+    "distribution": null,
+    "dimensions": 384,
+    "url": "http://localhost:7777",
+    "query": null,
+    "input_field": [
+      "input"
+    ],
+    "path_to_embeddings": [
+      "data"
+    ],
+    "embedding_object": [
+      "embedding"
+    ],
+    "input_type": "text"
+  }
+}
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-6.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-6.snap
new file mode 100644
index 000000000..712a62c77
--- /dev/null
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-6.snap
@@ -0,0 +1,11 @@
+---
+source: index-scheduler/src/lib.rs
+expression: simple_hf_config.embedder_options
+---
+{
+  "HuggingFace": {
+    "model": "sentence-transformers/all-MiniLM-L6-v2",
+    "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
+    "distribution": null
+  }
+}
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap
new file mode 100644
index 000000000..002a42e59
--- /dev/null
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-9.snap
@@ -0,0 +1,19 @@
+---
+source: index-scheduler/src/lib.rs
+expression: doc
+---
+{
+  "doggo": "Intel",
+  "breed": "beagle",
+  "_vectors": {
+    "A_fakerest": {
+      "embeddings": "[vector]",
+      "userProvided": true
+    },
+    "noise": [
+      0.1,
+      0.2,
+      0.3
+    ]
+  }
+}
diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap
new file mode 100644
index 000000000..bc16fc8be
--- /dev/null
+++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap
@@ -0,0 +1,20 @@
+---
+source: index-scheduler/src/lib.rs
+expression: task.details
+---
+{
+  "embedders": {
+    "A_fakerest": {
+      "source": "rest",
+      "apiKey": "MyXXXX...",
+      "dimensions": 384,
+      "url": "http://localhost:7777"
+    },
+    "B_small_hf": {
+      "source": "huggingFace",
+      "model": "sentence-transformers/all-MiniLM-L6-v2",
+      "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
+      "documentTemplate": "{{doc.doggo}} the {{doc.breed}} best doggo"
+    }
+  }
+}
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap
new file mode 100644
index 000000000..6b285ba56
--- /dev/null
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap	
@@ -0,0 +1,49 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
+2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [0,1,2,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,2,]
+"settingsUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,2,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+[timestamp] [2,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+[timestamp] [2,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+[timestamp] [2,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap
new file mode 100644
index 000000000..6f23d96fd
--- /dev/null
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap	
@@ -0,0 +1,48 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
+2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued [2,]
+succeeded [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,2,]
+"settingsUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,2,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+[timestamp] [2,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### File Store:
+00000000-0000-0000-0000-000000000001
+
+----------------------------------------------------------------------
+
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap
new file mode 100644
index 000000000..5dcb5a4f7
--- /dev/null
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap	
@@ -0,0 +1,45 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,]
+"settingsUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1, "doggo": 1, "id": 1} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap
new file mode 100644
index 000000000..80521df42
--- /dev/null
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap	
@@ -0,0 +1,44 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued [1,]
+succeeded [0,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,]
+"settingsUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 0, field_distribution: {} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### File Store:
+00000000-0000-0000-0000-000000000000
+
+----------------------------------------------------------------------
+
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
new file mode 100644
index 000000000..97b669f44
--- /dev/null
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
@@ -0,0 +1,36 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,]
+----------------------------------------------------------------------
+### Kind:
+"settingsUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,]
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
new file mode 100644
index 000000000..f3ce4b104
--- /dev/null
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
@@ -0,0 +1,40 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [0,]
+----------------------------------------------------------------------
+### Kind:
+"settingsUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 0, field_distribution: {} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+

From 7e251b43d41b4f44ff597a02ea1d0f759c42989e Mon Sep 17 00:00:00 2001
From: Tamo <irevoire@protonmail.ch>
Date: Mon, 20 May 2024 15:09:45 +0200
Subject: [PATCH 39/56] Revert "Stream documents"

---
 Cargo.lock                                    |  38 +++---
 index-scheduler/src/batch.rs                  |  10 +-
 index-scheduler/src/lib.rs                    |  14 +--
 meilisearch-auth/src/store.rs                 |   2 +-
 meilisearch-types/src/error.rs                |   1 +
 meilisearch/Cargo.toml                        |   1 -
 meilisearch/src/routes/indexes/documents.rs   | 116 +++++-------------
 meilisearch/src/routes/mod.rs                 |  28 ++---
 meilitool/src/main.rs                         |   8 +-
 milli/Cargo.toml                              |   4 +-
 milli/fuzz/.gitignore                         |   3 -
 milli/src/error.rs                            |   3 +
 milli/src/index.rs                            |   7 +-
 milli/src/update/facet/mod.rs                 |   2 +-
 milli/src/update/index_documents/mod.rs       |   2 +-
 .../src/update/index_documents/typed_chunk.rs |   3 +-
 16 files changed, 91 insertions(+), 151 deletions(-)
 delete mode 100644 milli/fuzz/.gitignore

diff --git a/Cargo.lock b/Cargo.lock
index d9e96b029..937fce64a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -378,9 +378,9 @@ dependencies = [
 
 [[package]]
 name = "arroy"
-version = "0.3.1"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
+checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -1536,9 +1536,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 
 [[package]]
 name = "doxygen-rs"
-version = "0.4.2"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
+checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
 dependencies = [
  "phf",
 ]
@@ -2262,11 +2262,12 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "heed"
-version = "0.20.1"
+version = "0.20.0-alpha.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
+checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
 dependencies = [
  "bitflags 2.5.0",
+ "bytemuck",
  "byteorder",
  "heed-traits",
  "heed-types",
@@ -2280,15 +2281,15 @@ dependencies = [
 
 [[package]]
 name = "heed-traits"
-version = "0.20.0"
+version = "0.20.0-alpha.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
+checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
 
 [[package]]
 name = "heed-types"
-version = "0.20.0"
+version = "0.20.0-alpha.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cb0d6ba3700c9a57e83c013693e3eddb68a6d9b6781cacafc62a0d992e8ddb3"
+checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
 dependencies = [
  "bincode",
  "byteorder",
@@ -3188,13 +3189,14 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
 
 [[package]]
 name = "lmdb-master-sys"
-version = "0.2.0"
+version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
+checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
 dependencies = [
  "cc",
  "doxygen-rs",
  "libc",
+ "pkg-config",
 ]
 
 [[package]]
@@ -3346,7 +3348,6 @@ dependencies = [
  "rayon",
  "regex",
  "reqwest",
- "roaring",
  "rustls 0.21.12",
  "rustls-pemfile",
  "segment",
@@ -4415,6 +4416,12 @@ dependencies = [
  "winreg",
 ]
 
+[[package]]
+name = "retain_mut"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086"
+
 [[package]]
 name = "ring"
 version = "0.17.8"
@@ -4432,12 +4439,13 @@ dependencies = [
 
 [[package]]
 name = "roaring"
-version = "0.10.3"
+version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf"
+checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873"
 dependencies = [
  "bytemuck",
  "byteorder",
+ "retain_mut",
  "serde",
 ]
 
diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index 582497c15..bc9823a01 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -785,12 +785,10 @@ impl IndexScheduler {
                 let dst = temp_snapshot_dir.path().join("auth");
                 fs::create_dir_all(&dst)?;
                 // TODO We can't use the open_auth_store_env function here but we should
-                let auth = unsafe {
-                    milli::heed::EnvOpenOptions::new()
-                        .map_size(1024 * 1024 * 1024) // 1 GiB
-                        .max_dbs(2)
-                        .open(&self.auth_path)
-                }?;
+                let auth = milli::heed::EnvOpenOptions::new()
+                    .map_size(1024 * 1024 * 1024) // 1 GiB
+                    .max_dbs(2)
+                    .open(&self.auth_path)?;
                 auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
 
                 // 5. Copy and tarball the flat snapshot
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index dd2b296f6..5704f5354 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -453,12 +453,10 @@ impl IndexScheduler {
             )
         };
 
-        let env = unsafe {
-            heed::EnvOpenOptions::new()
-                .max_dbs(11)
-                .map_size(budget.task_db_size)
-                .open(options.tasks_path)
-        }?;
+        let env = heed::EnvOpenOptions::new()
+            .max_dbs(11)
+            .map_size(budget.task_db_size)
+            .open(options.tasks_path)?;
 
         let features = features::FeatureData::new(&env, options.instance_features)?;
 
@@ -587,9 +585,9 @@ impl IndexScheduler {
     }
 
     fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
-        if let Ok(env) = unsafe {
+        if let Ok(env) =
             heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
-        } {
+        {
             env.prepare_for_closing().wait();
             true
         } else {
diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs
index ef992e836..1eebd3fe9 100644
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
     let mut options = EnvOpenOptions::new();
     options.map_size(AUTH_STORE_SIZE); // 1GB
     options.max_dbs(2);
-    unsafe { options.open(path) }
+    options.open(path)
 }
 
 impl HeedAuthStore {
diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index 158dfae92..eea012331 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -423,6 +423,7 @@ impl ErrorCode for HeedError {
             HeedError::Mdb(_)
             | HeedError::Encoding(_)
             | HeedError::Decoding(_)
+            | HeedError::InvalidDatabaseTyping
             | HeedError::DatabaseClosing
             | HeedError::BadOpenOptions { .. } => Code::Internal,
         }
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index 612c6731b..ed62c5f48 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -108,7 +108,6 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.9"
 build-info = { version = "1.7.0", path = "../build-info" }
-roaring = "0.10.3"
 
 [dev-dependencies]
 actix-rt = "2.9.0"
diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs
index 7c9b4b761..43fab1dae 100644
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -1,14 +1,12 @@
-use std::io::{ErrorKind, Write};
+use std::io::ErrorKind;
 
 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
 use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
 use bstr::ByteSlice as _;
-use bytes::Bytes;
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
-use futures_util::Stream;
 use index_scheduler::{IndexScheduler, TaskId};
 use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
@@ -24,9 +22,7 @@ use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
 use mime::Mime;
 use once_cell::sync::Lazy;
-use roaring::RoaringBitmap;
-use serde::ser::SerializeSeq;
-use serde::{Deserialize, Serialize};
+use serde::Deserialize;
 use serde_json::Value;
 use tempfile::tempfile;
 use tokio::fs::File;
@@ -234,34 +230,6 @@ pub async fn get_documents(
     documents_by_query(&index_scheduler, index_uid, query)
 }
 
-pub struct Writer2Streamer {
-    sender: tokio::sync::mpsc::Sender<Result<Bytes, anyhow::Error>>,
-}
-
-impl Write for Writer2Streamer {
-    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
-        self.sender.blocking_send(Ok(buf.to_vec().into())).map_err(std::io::Error::other)?;
-        Ok(buf.len())
-    }
-
-    fn flush(&mut self) -> std::io::Result<()> {
-        Ok(())
-    }
-}
-
-pub fn stream(
-    data: impl Serialize + Send + 'static,
-) -> impl Stream<Item = Result<Bytes, anyhow::Error>> {
-    let (sender, receiver) = tokio::sync::mpsc::channel::<Result<Bytes, anyhow::Error>>(1);
-
-    tokio::task::spawn_blocking(move || {
-        serde_json::to_writer(std::io::BufWriter::new(Writer2Streamer { sender }), &data)
-    });
-    futures_util::stream::unfold(receiver, |mut receiver| async {
-        receiver.recv().await.map(|value| (value, receiver))
-    })
-}
-
 fn documents_by_query(
     index_scheduler: &IndexScheduler,
     index_uid: web::Path<String>,
@@ -271,13 +239,12 @@ fn documents_by_query(
     let BrowseQuery { offset, limit, fields, filter } = query;
 
     let index = index_scheduler.index(&index_uid)?;
-    let documents = retrieve_documents(index, offset, limit, filter, fields)?;
+    let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;
 
-    let ret = PaginationView::new(offset, limit, documents.total_documents as usize, documents);
+    let ret = PaginationView::new(offset, limit, total as usize, documents);
 
     debug!(returns = ?ret, "Get documents");
-
-    Ok(HttpResponse::Ok().streaming(stream(ret)))
+    Ok(HttpResponse::Ok().json(ret))
 }
 
 #[derive(Deserialize, Debug, Deserr)]
@@ -623,47 +590,14 @@ fn some_documents<'a, 't: 'a>(
     }))
 }
 
-pub struct DocumentsStreamer {
-    attributes_to_retrieve: Option<Vec<String>>,
-    documents: RoaringBitmap,
-    rtxn: RoTxn<'static>,
-    index: Index,
-    pub total_documents: u64,
-}
-
-impl Serialize for DocumentsStreamer {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        let mut seq = serializer.serialize_seq(Some(self.documents.len() as usize)).unwrap();
-
-        let documents = some_documents(&self.index, &self.rtxn, self.documents.iter()).unwrap();
-        for document in documents {
-            let document = document.unwrap();
-            let document = match self.attributes_to_retrieve {
-                Some(ref attributes_to_retrieve) => permissive_json_pointer::select_values(
-                    &document,
-                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
-                ),
-                None => document,
-            };
-
-            seq.serialize_element(&document)?;
-        }
-        seq.end()
-    }
-}
-
-fn retrieve_documents(
-    index: Index,
+fn retrieve_documents<S: AsRef<str>>(
+    index: &Index,
     offset: usize,
     limit: usize,
     filter: Option<Value>,
-    attributes_to_retrieve: Option<Vec<String>>,
-) -> Result<DocumentsStreamer, ResponseError> {
-    let rtxn = index.static_read_txn()?;
-
+    attributes_to_retrieve: Option<Vec<S>>,
+) -> Result<(u64, Vec<Document>), ResponseError> {
+    let rtxn = index.read_txn()?;
     let filter = &filter;
     let filter = if let Some(filter) = filter {
         parse_filter(filter)
@@ -673,7 +607,7 @@ fn retrieve_documents(
     };
 
     let candidates = if let Some(filter) = filter {
-        filter.evaluate(&rtxn, &index).map_err(|err| match err {
+        filter.evaluate(&rtxn, index).map_err(|err| match err {
             milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
                 ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
             }
@@ -683,13 +617,27 @@ fn retrieve_documents(
         index.documents_ids(&rtxn)?
     };
 
-    Ok(DocumentsStreamer {
-        total_documents: candidates.len(),
-        attributes_to_retrieve,
-        documents: candidates.into_iter().skip(offset).take(limit).collect(),
-        rtxn,
-        index,
-    })
+    let (it, number_of_documents) = {
+        let number_of_documents = candidates.len();
+        (
+            some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
+            number_of_documents,
+        )
+    };
+
+    let documents: Result<Vec<_>, ResponseError> = it
+        .map(|document| {
+            Ok(match &attributes_to_retrieve {
+                Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
+                    &document?,
+                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
+                ),
+                None => document?,
+            })
+        })
+        .collect();
+
+    Ok((number_of_documents, documents?))
 }
 
 fn retrieve_document<S: AsRef<str>>(
diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs
index a7e84d19c..c25aeee70 100644
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -1,5 +1,4 @@
 use std::collections::BTreeMap;
-use std::fmt;
 
 use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
@@ -125,31 +124,20 @@ pub struct Pagination {
     pub limit: usize,
 }
 
-#[derive(Clone, Serialize)]
-pub struct PaginationView<T: Serialize> {
-    pub results: T,
+#[derive(Debug, Clone, Serialize)]
+pub struct PaginationView<T> {
+    pub results: Vec<T>,
     pub offset: usize,
     pub limit: usize,
     pub total: usize,
 }
 
-impl<T: Serialize> fmt::Debug for PaginationView<T> {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("PaginationView")
-            .field("offset", &self.offset)
-            .field("limit", &self.limit)
-            .field("total", &self.total)
-            .field("results", &"[...]")
-            .finish()
-    }
-}
-
 impl Pagination {
     /// Given the full data to paginate, returns the selected section.
     pub fn auto_paginate_sized<T>(
         self,
         content: impl IntoIterator<Item = T> + ExactSizeIterator,
-    ) -> PaginationView<Vec<T>>
+    ) -> PaginationView<T>
     where
         T: Serialize,
     {
@@ -163,7 +151,7 @@ impl Pagination {
         self,
         total: usize,
         content: impl IntoIterator<Item = T>,
-    ) -> PaginationView<Vec<T>>
+    ) -> PaginationView<T>
     where
         T: Serialize,
     {
@@ -173,7 +161,7 @@ impl Pagination {
 
     /// Given the data already paginated + the total number of elements, it stores
     /// everything in a [PaginationResult].
-    pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<Vec<T>>
+    pub fn format_with<T>(self, total: usize, results: Vec<T>) -> PaginationView<T>
     where
         T: Serialize,
     {
@@ -181,8 +169,8 @@ impl Pagination {
     }
 }
 
-impl<T: Serialize> PaginationView<T> {
-    pub fn new(offset: usize, limit: usize, total: usize, results: T) -> Self {
+impl<T> PaginationView<T> {
+    pub fn new(offset: usize, limit: usize, total: usize, results: Vec<T>) -> Self {
         Self { offset, limit, results, total }
     }
 }
diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs
index 06c4890a5..bfcbfdd6d 100644
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -80,7 +80,9 @@ fn main() -> anyhow::Result<()> {
 /// Clears the task queue located at `db_path`.
 fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
     let path = db_path.join("tasks");
-    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&path)
         .with_context(|| format!("While trying to open {:?}", path.display()))?;
 
     eprintln!("Deleting tasks from the database...");
@@ -191,7 +193,9 @@ fn export_a_dump(
         FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
 
     let index_scheduler_path = db_path.join("tasks");
-    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&index_scheduler_path)
         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
 
     eprintln!("Dumping the keys...");
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index c5dddd0fd..7d903178b 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -30,7 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
     "rayon",
     "tempfile",
 ] }
-heed = { version = "0.20.1", default-features = false, features = [
+heed = { version = "0.20.0-alpha.9", default-features = false, features = [
     "serde-json",
     "serde-bincode",
     "read-txn-no-tls",
@@ -82,7 +82,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.8"
 liquid = "0.26.4"
-arroy = "0.3.1"
+arroy = "0.2.0"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.9.7", features = ["json"] }
diff --git a/milli/fuzz/.gitignore b/milli/fuzz/.gitignore
deleted file mode 100644
index a0925114d..000000000
--- a/milli/fuzz/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-target
-corpus
-artifacts
diff --git a/milli/src/error.rs b/milli/src/error.rs
index 6db0dcac1..009781fcf 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -48,6 +48,8 @@ pub enum InternalError {
     GrenadInvalidFormatVersion,
     #[error("Invalid merge while processing {process}")]
     IndexingMergingKeys { process: &'static str },
+    #[error("{}", HeedError::InvalidDatabaseTyping)]
+    InvalidDatabaseTyping,
     #[error(transparent)]
     RayonThreadPool(#[from] ThreadPoolBuildError),
     #[error(transparent)]
@@ -427,6 +429,7 @@ impl From<HeedError> for Error {
             // TODO use the encoding
             HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
             HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
+            HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
             HeedError::DatabaseClosing => InternalError(DatabaseClosing),
             HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
         }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 739a7f202..42b9cb111 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -184,7 +184,7 @@ impl Index {
 
         options.max_dbs(25);
 
-        let env = unsafe { options.open(path) }?;
+        let env = options.open(path)?;
         let mut wtxn = env.write_txn()?;
         let main = env.database_options().name(MAIN).create(&mut wtxn)?;
         let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
@@ -294,11 +294,6 @@ impl Index {
         self.env.read_txn()
     }
 
-    /// Create a static read transaction to be able to read the index without keeping a reference to it.
-    pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
-        self.env.clone().static_read_txn()
-    }
-
     /// Returns the canonicalized path where the heed `Env` of this `Index` lives.
     pub fn path(&self) -> &Path {
         self.env.path()
diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs
index 42994551f..0af64c4c5 100644
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -379,7 +379,7 @@ pub(crate) mod test_helpers {
             let mut options = heed::EnvOpenOptions::new();
             let options = options.map_size(4096 * 4 * 1000 * 100);
             let tempdir = tempfile::TempDir::new().unwrap();
-            let env = unsafe { options.open(tempdir.path()) }.unwrap();
+            let env = options.open(tempdir.path()).unwrap();
             let mut wtxn = env.write_txn().unwrap();
             let content = env.create_database(&mut wtxn, None).unwrap();
             wtxn.commit().unwrap();
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 4d2fac7cb..936ce1efc 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -556,7 +556,7 @@ where
                 let writer_index = (embedder_index as u16) << 8;
                 for k in 0..=u8::MAX {
                     let writer =
-                        arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension);
+                        arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?;
                     if writer.is_empty(wtxn)? {
                         break;
                     }
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index e0de2d5a1..6aad290e5 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -661,7 +661,7 @@ pub(crate) fn write_typed_chunk_into_index(
             )?;
             let writer_index = (embedder_index as u16) << 8;
             // FIXME: allow customizing distance
-            let writers: Vec<_> = (0..=u8::MAX)
+            let writers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
                 .map(|k| {
                     arroy::Writer::new(
                         index.vector_arroy,
@@ -670,6 +670,7 @@ pub(crate) fn write_typed_chunk_into_index(
                     )
                 })
                 .collect();
+            let writers = writers?;
 
             // remove vectors for docids we want them removed
             let merger = remove_vectors_builder.build();

From c9ac7f2e7e02ac927bba70b7cfeaa020f3e60534 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 16 May 2024 16:10:55 +0200
Subject: [PATCH 40/56] update heed to latest version

---
 Cargo.lock                                    | 26 +++++++++----------
 index-scheduler/src/batch.rs                  | 10 ++++---
 index-scheduler/src/lib.rs                    | 14 +++++-----
 meilisearch-auth/src/store.rs                 |  2 +-
 meilisearch-types/src/error.rs                |  1 -
 meilitool/src/main.rs                         |  8 ++----
 milli/Cargo.toml                              |  4 +--
 milli/fuzz/.gitignore                         |  3 +++
 milli/src/error.rs                            |  3 ---
 milli/src/index.rs                            |  7 ++++-
 milli/src/update/facet/mod.rs                 |  2 +-
 milli/src/update/index_documents/mod.rs       |  2 +-
 .../src/update/index_documents/typed_chunk.rs |  3 +--
 13 files changed, 43 insertions(+), 42 deletions(-)
 create mode 100644 milli/fuzz/.gitignore

diff --git a/Cargo.lock b/Cargo.lock
index 937fce64a..156917462 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -378,9 +378,9 @@ dependencies = [
 
 [[package]]
 name = "arroy"
-version = "0.2.0"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efddeb1e7c32a551cc07ef4c3e181e3cd5478fdaf4f0bd799983171c1f6efe57"
+checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
 dependencies = [
  "bytemuck",
  "byteorder",
@@ -1536,9 +1536,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
 
 [[package]]
 name = "doxygen-rs"
-version = "0.2.2"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bff670ea0c9bbb8414e7efa6e23ebde2b8f520a7eef78273a3918cf1903e7505"
+checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9"
 dependencies = [
  "phf",
 ]
@@ -2262,12 +2262,11 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
 [[package]]
 name = "heed"
-version = "0.20.0-alpha.9"
+version = "0.20.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9648a50991c86df7d00c56c268c27754fcf4c80be2ba57fc4a00dc928c6fe934"
+checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
 dependencies = [
  "bitflags 2.5.0",
- "bytemuck",
  "byteorder",
  "heed-traits",
  "heed-types",
@@ -2281,15 +2280,15 @@ dependencies = [
 
 [[package]]
 name = "heed-traits"
-version = "0.20.0-alpha.9"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ab0b7d9cde969ad36dde692e487dc89d97f7168bf6a7bd3b894ad4bf7278298"
+checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff"
 
 [[package]]
 name = "heed-types"
-version = "0.20.0-alpha.9"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0cb3567a7363f28b597bf6e9897b9466397951dd0e52df2c8196dd8a71af44a"
+checksum = "3cb0d6ba3700c9a57e83c013693e3eddb68a6d9b6781cacafc62a0d992e8ddb3"
 dependencies = [
  "bincode",
  "byteorder",
@@ -3189,14 +3188,13 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"
 
 [[package]]
 name = "lmdb-master-sys"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "629c123f5321b48fa4f8f4d3b868165b748d9ba79c7103fb58e3a94f736bcedd"
+checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
 dependencies = [
  "cc",
  "doxygen-rs",
  "libc",
- "pkg-config",
 ]
 
 [[package]]
diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index bc9823a01..582497c15 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -785,10 +785,12 @@ impl IndexScheduler {
                 let dst = temp_snapshot_dir.path().join("auth");
                 fs::create_dir_all(&dst)?;
                 // TODO We can't use the open_auth_store_env function here but we should
-                let auth = milli::heed::EnvOpenOptions::new()
-                    .map_size(1024 * 1024 * 1024) // 1 GiB
-                    .max_dbs(2)
-                    .open(&self.auth_path)?;
+                let auth = unsafe {
+                    milli::heed::EnvOpenOptions::new()
+                        .map_size(1024 * 1024 * 1024) // 1 GiB
+                        .max_dbs(2)
+                        .open(&self.auth_path)
+                }?;
                 auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
 
                 // 5. Copy and tarball the flat snapshot
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 5704f5354..dd2b296f6 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -453,10 +453,12 @@ impl IndexScheduler {
             )
         };
 
-        let env = heed::EnvOpenOptions::new()
-            .max_dbs(11)
-            .map_size(budget.task_db_size)
-            .open(options.tasks_path)?;
+        let env = unsafe {
+            heed::EnvOpenOptions::new()
+                .max_dbs(11)
+                .map_size(budget.task_db_size)
+                .open(options.tasks_path)
+        }?;
 
         let features = features::FeatureData::new(&env, options.instance_features)?;
 
@@ -585,9 +587,9 @@ impl IndexScheduler {
     }
 
     fn is_good_heed(tasks_path: &Path, map_size: usize) -> bool {
-        if let Ok(env) =
+        if let Ok(env) = unsafe {
             heed::EnvOpenOptions::new().map_size(clamp_to_page_size(map_size)).open(tasks_path)
-        {
+        } {
             env.prepare_for_closing().wait();
             true
         } else {
diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs
index 1eebd3fe9..ef992e836 100644
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@@ -49,7 +49,7 @@ pub fn open_auth_store_env(path: &Path) -> milli::heed::Result<milli::heed::Env>
     let mut options = EnvOpenOptions::new();
     options.map_size(AUTH_STORE_SIZE); // 1GB
     options.max_dbs(2);
-    options.open(path)
+    unsafe { options.open(path) }
 }
 
 impl HeedAuthStore {
diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index eea012331..158dfae92 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -423,7 +423,6 @@ impl ErrorCode for HeedError {
             HeedError::Mdb(_)
             | HeedError::Encoding(_)
             | HeedError::Decoding(_)
-            | HeedError::InvalidDatabaseTyping
             | HeedError::DatabaseClosing
             | HeedError::BadOpenOptions { .. } => Code::Internal,
         }
diff --git a/meilitool/src/main.rs b/meilitool/src/main.rs
index bfcbfdd6d..06c4890a5 100644
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -80,9 +80,7 @@ fn main() -> anyhow::Result<()> {
 /// Clears the task queue located at `db_path`.
 fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
     let path = db_path.join("tasks");
-    let env = EnvOpenOptions::new()
-        .max_dbs(100)
-        .open(&path)
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&path) }
         .with_context(|| format!("While trying to open {:?}", path.display()))?;
 
     eprintln!("Deleting tasks from the database...");
@@ -193,9 +191,7 @@ fn export_a_dump(
         FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
 
     let index_scheduler_path = db_path.join("tasks");
-    let env = EnvOpenOptions::new()
-        .max_dbs(100)
-        .open(&index_scheduler_path)
+    let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) }
         .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
 
     eprintln!("Dumping the keys...");
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 7d903178b..c5dddd0fd 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -30,7 +30,7 @@ grenad = { version = "0.4.6", default-features = false, features = [
     "rayon",
     "tempfile",
 ] }
-heed = { version = "0.20.0-alpha.9", default-features = false, features = [
+heed = { version = "0.20.1", default-features = false, features = [
     "serde-json",
     "serde-bincode",
     "read-txn-no-tls",
@@ -82,7 +82,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.8"
 liquid = "0.26.4"
-arroy = "0.2.0"
+arroy = "0.3.1"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.9.7", features = ["json"] }
diff --git a/milli/fuzz/.gitignore b/milli/fuzz/.gitignore
new file mode 100644
index 000000000..a0925114d
--- /dev/null
+++ b/milli/fuzz/.gitignore
@@ -0,0 +1,3 @@
+target
+corpus
+artifacts
diff --git a/milli/src/error.rs b/milli/src/error.rs
index 009781fcf..6db0dcac1 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -48,8 +48,6 @@ pub enum InternalError {
     GrenadInvalidFormatVersion,
     #[error("Invalid merge while processing {process}")]
     IndexingMergingKeys { process: &'static str },
-    #[error("{}", HeedError::InvalidDatabaseTyping)]
-    InvalidDatabaseTyping,
     #[error(transparent)]
     RayonThreadPool(#[from] ThreadPoolBuildError),
     #[error(transparent)]
@@ -429,7 +427,6 @@ impl From<HeedError> for Error {
             // TODO use the encoding
             HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
             HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
-            HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
             HeedError::DatabaseClosing => InternalError(DatabaseClosing),
             HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
         }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 42b9cb111..739a7f202 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -184,7 +184,7 @@ impl Index {
 
         options.max_dbs(25);
 
-        let env = options.open(path)?;
+        let env = unsafe { options.open(path) }?;
         let mut wtxn = env.write_txn()?;
         let main = env.database_options().name(MAIN).create(&mut wtxn)?;
         let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
@@ -294,6 +294,11 @@ impl Index {
         self.env.read_txn()
     }
 
+    /// Create a static read transaction to be able to read the index without keeping a reference to it.
+    pub fn static_read_txn(&self) -> heed::Result<RoTxn<'static>> {
+        self.env.clone().static_read_txn()
+    }
+
     /// Returns the canonicalized path where the heed `Env` of this `Index` lives.
     pub fn path(&self) -> &Path {
         self.env.path()
diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs
index 0af64c4c5..42994551f 100644
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -379,7 +379,7 @@ pub(crate) mod test_helpers {
             let mut options = heed::EnvOpenOptions::new();
             let options = options.map_size(4096 * 4 * 1000 * 100);
             let tempdir = tempfile::TempDir::new().unwrap();
-            let env = options.open(tempdir.path()).unwrap();
+            let env = unsafe { options.open(tempdir.path()) }.unwrap();
             let mut wtxn = env.write_txn().unwrap();
             let content = env.create_database(&mut wtxn, None).unwrap();
             wtxn.commit().unwrap();
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 936ce1efc..4d2fac7cb 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -556,7 +556,7 @@ where
                 let writer_index = (embedder_index as u16) << 8;
                 for k in 0..=u8::MAX {
                     let writer =
-                        arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension)?;
+                        arroy::Writer::new(vector_arroy, writer_index | (k as u16), dimension);
                     if writer.is_empty(wtxn)? {
                         break;
                     }
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 6aad290e5..e0de2d5a1 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -661,7 +661,7 @@ pub(crate) fn write_typed_chunk_into_index(
             )?;
             let writer_index = (embedder_index as u16) << 8;
             // FIXME: allow customizing distance
-            let writers: std::result::Result<Vec<_>, _> = (0..=u8::MAX)
+            let writers: Vec<_> = (0..=u8::MAX)
                 .map(|k| {
                     arroy::Writer::new(
                         index.vector_arroy,
@@ -670,7 +670,6 @@ pub(crate) fn write_typed_chunk_into_index(
                     )
                 })
                 .collect();
-            let writers = writers?;
 
             // remove vectors for docids we want them removed
             let merger = remove_vectors_builder.build();

From 1aa8ed9ef7bc02fe805e77e2feee4b81031acb05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 21 May 2024 14:53:26 +0200
Subject: [PATCH 41/56] Make the original sorter optional

---
 milli/src/update/index_documents/mod.rs       | 22 +++++++++--
 milli/src/update/index_documents/transform.rs | 38 ++++++++++++-------
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 4d2fac7cb..cceb25338 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -6,6 +6,7 @@ mod typed_chunk;
 
 use std::collections::{HashMap, HashSet};
 use std::io::{Read, Seek};
+use std::iter;
 use std::num::NonZeroU32;
 use std::result::Result as StdResult;
 use std::sync::Arc;
@@ -373,8 +374,11 @@ where
             }
         };
 
-        let original_documents = grenad::Reader::new(original_documents)?;
         let flattened_documents = grenad::Reader::new(flattened_documents)?;
+        let original_documents = match original_documents {
+            Some(original_documents) => Some(grenad::Reader::new(original_documents)?),
+            None => None,
+        };
 
         let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
 
@@ -393,11 +397,21 @@ where
         pool.install(|| {
             rayon::spawn(move || {
                 let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
-            let _enter = child_span.enter();
-            puffin::profile_scope!("extract_and_send_grenad_chunks");
+                let _enter = child_span.enter();
+                puffin::profile_scope!("extract_and_send_grenad_chunks");
                 // split obkv file into several chunks
                 let original_chunk_iter =
-                    grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
+                    match original_documents {
+                        Some(original_documents) => {
+                            grenad_obkv_into_chunks(
+                                original_documents,
+                                pool_params,
+                                documents_chunk_size
+                            )
+                            .map(either::Either::Left)
+                        },
+                        None => Ok(either::Right(iter::empty())),
+                    };
 
                 // split obkv file into several chunks
                 let flattened_chunk_iter =
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index 8a3463e6f..f7e3d79fd 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -33,7 +33,7 @@ pub struct TransformOutput {
     pub settings_diff: InnerIndexSettingsDiff,
     pub field_distribution: FieldDistribution,
     pub documents_count: usize,
-    pub original_documents: File,
+    pub original_documents: Option<File>,
     pub flattened_documents: File,
 }
 
@@ -822,7 +822,9 @@ impl<'a, 'i> Transform<'a, 'i> {
             settings_diff,
             field_distribution,
             documents_count: self.documents_count,
-            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
+            original_documents: Some(
+                original_documents.into_inner().map_err(|err| err.into_error())?,
+            ),
             flattened_documents: flattened_documents
                 .into_inner()
                 .map_err(|err| err.into_error())?,
@@ -891,14 +893,18 @@ impl<'a, 'i> Transform<'a, 'i> {
         let documents_count = documents_ids.len() as usize;
 
         // We initialize the sorter with the user indexing settings.
-        let mut original_sorter = create_sorter(
-            grenad::SortAlgorithm::Stable,
-            keep_first,
-            self.indexer_settings.chunk_compression_type,
-            self.indexer_settings.chunk_compression_level,
-            self.indexer_settings.max_nb_chunks,
-            self.indexer_settings.max_memory.map(|mem| mem / 2),
-        );
+        let mut original_sorter = if settings_diff.reindex_vectors() {
+            Some(create_sorter(
+                grenad::SortAlgorithm::Stable,
+                keep_first,
+                self.indexer_settings.chunk_compression_type,
+                self.indexer_settings.chunk_compression_level,
+                self.indexer_settings.max_nb_chunks,
+                self.indexer_settings.max_memory.map(|mem| mem / 2),
+            ))
+        } else {
+            None
+        };
 
         // We initialize the sorter with the user indexing settings.
         let mut flattened_sorter = create_sorter(
@@ -929,7 +935,9 @@ impl<'a, 'i> Transform<'a, 'i> {
             document_sorter_key_buffer.clear();
             document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
             document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
-            original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
+            if let Some(original_sorter) = original_sorter.as_mut() {
+                original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
+            }
             flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
         }
 
@@ -941,16 +949,18 @@ impl<'a, 'i> Transform<'a, 'i> {
         };
 
         // Once we have written all the documents, we merge everything into a Reader.
-        let original_documents = sorter_into_reader(original_sorter, grenad_params)?;
-
         let flattened_documents = sorter_into_reader(flattened_sorter, grenad_params)?;
+        let original_documents = match original_sorter {
+            Some(original_sorter) => Some(sorter_into_reader(original_sorter, grenad_params)?),
+            None => None,
+        };
 
         Ok(TransformOutput {
             primary_key,
             field_distribution,
             settings_diff,
             documents_count,
-            original_documents: original_documents.into_inner().into_inner(),
+            original_documents: original_documents.map(|od| od.into_inner().into_inner()),
             flattened_documents: flattened_documents.into_inner().into_inner(),
         })
     }

From 943f8dba0c97bd96f1a325abfbf5d76833e35c45 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 21 May 2024 14:58:36 +0200
Subject: [PATCH 42/56] Make clippy happy

---
 .../src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
index dcab42c0a..1db518c7d 100644
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -195,7 +195,7 @@ mod tests {
     fn merge_cbo_roaring_bitmaps() {
         let mut buffer = Vec::new();
 
-        let small_data = vec![
+        let small_data = [
             RoaringBitmap::from_sorted_iter(1..4).unwrap(),
             RoaringBitmap::from_sorted_iter(2..5).unwrap(),
             RoaringBitmap::from_sorted_iter(4..6).unwrap(),
@@ -209,7 +209,7 @@ mod tests {
         let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
         assert_eq!(bitmap, expected);
 
-        let medium_data = vec![
+        let medium_data = [
             RoaringBitmap::from_sorted_iter(1..4).unwrap(),
             RoaringBitmap::from_sorted_iter(2..5).unwrap(),
             RoaringBitmap::from_sorted_iter(4..8).unwrap(),

From eccbcf51300277a81eeb43678855bbe1299a65e2 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 21 May 2024 14:59:08 +0200
Subject: [PATCH 43/56] Increase index-scheduler test timeouts

---
 index-scheduler/src/lib.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index f743422a7..e4c9cd08f 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1852,7 +1852,7 @@ mod tests {
 
             // To be 100% consistent between all test we're going to start the scheduler right now
             // and ensure it's in the expected starting state.
-            let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(1)) {
+            let breakpoint = match receiver.recv_timeout(std::time::Duration::from_secs(10)) {
                 Ok(b) => b,
                 Err(RecvTimeoutError::Timeout) => {
                     panic!("The scheduler seems to be waiting for a new task while your test is waiting for a breakpoint.")
@@ -1963,7 +1963,7 @@ mod tests {
         fn advance(&mut self) -> Breakpoint {
             let (breakpoint_1, b) = match self
                 .test_breakpoint_rcv
-                .recv_timeout(std::time::Duration::from_secs(5))
+                .recv_timeout(std::time::Duration::from_secs(50))
             {
                 Ok(b) => b,
                 Err(RecvTimeoutError::Timeout) => {
@@ -1984,7 +1984,7 @@ mod tests {
 
             let (breakpoint_2, b) = match self
                 .test_breakpoint_rcv
-                .recv_timeout(std::time::Duration::from_secs(5))
+                .recv_timeout(std::time::Duration::from_secs(50))
             {
                 Ok(b) => b,
                 Err(RecvTimeoutError::Timeout) => {

From 500ddc76b549fb9f1af54b2dd6abfa15960381bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 21 May 2024 16:16:36 +0200
Subject: [PATCH 44/56] Make the flattened sorter optional

---
 milli/src/update/index_documents/mod.rs       | 36 +++++++++-------
 milli/src/update/index_documents/transform.rs | 43 ++++++++++++-------
 milli/src/update/settings.rs                  |  1 +
 3 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index cceb25338..dccfbe795 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -360,7 +360,10 @@ where
                 let min_chunk_size = 1024 * 512; // 512KiB
 
                 // compute the chunk size from the number of available threads and the inputed data size.
-                let total_size = flattened_documents.metadata().map(|m| m.len());
+                let total_size = match flattened_documents.as_ref() {
+                    Some(flattened_documents) => flattened_documents.metadata().map(|m| m.len()),
+                    None => Ok(default_chunk_size as u64),
+                };
                 let current_num_threads = pool.current_num_threads();
                 // if we have more than 2 thread, create a number of chunk equal to 3/4 threads count
                 let chunk_count = if current_num_threads > 2 {
@@ -374,11 +377,14 @@ where
             }
         };
 
-        let flattened_documents = grenad::Reader::new(flattened_documents)?;
         let original_documents = match original_documents {
             Some(original_documents) => Some(grenad::Reader::new(original_documents)?),
             None => None,
         };
+        let flattened_documents = match flattened_documents {
+            Some(flattened_documents) => Some(grenad::Reader::new(flattened_documents)?),
+            None => None,
+        };
 
         let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
 
@@ -400,22 +406,20 @@ where
                 let _enter = child_span.enter();
                 puffin::profile_scope!("extract_and_send_grenad_chunks");
                 // split obkv file into several chunks
-                let original_chunk_iter =
-                    match original_documents {
-                        Some(original_documents) => {
-                            grenad_obkv_into_chunks(
-                                original_documents,
-                                pool_params,
-                                documents_chunk_size
-                            )
-                            .map(either::Either::Left)
-                        },
-                        None => Ok(either::Right(iter::empty())),
-                    };
+                let original_chunk_iter = match original_documents {
+                    Some(original_documents) => {
+                        grenad_obkv_into_chunks(original_documents,pool_params,documents_chunk_size).map(either::Left)
+                    },
+                    None => Ok(either::Right(iter::empty())),
+                };
 
                 // split obkv file into several chunks
-                let flattened_chunk_iter =
-                    grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);
+                let flattened_chunk_iter = match flattened_documents {
+                    Some(flattened_documents) => {
+                        grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size).map(either::Left)
+                    },
+                    None => Ok(either::Right(iter::empty())),
+                };
 
                 let result = original_chunk_iter.and_then(|original_chunk| {
                     let flattened_chunk = flattened_chunk_iter?;
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index f7e3d79fd..8bedd778e 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -34,7 +34,7 @@ pub struct TransformOutput {
     pub field_distribution: FieldDistribution,
     pub documents_count: usize,
     pub original_documents: Option<File>,
-    pub flattened_documents: File,
+    pub flattened_documents: Option<File>,
 }
 
 /// Extract the external ids, deduplicate and compute the new internal documents ids
@@ -825,9 +825,9 @@ impl<'a, 'i> Transform<'a, 'i> {
             original_documents: Some(
                 original_documents.into_inner().map_err(|err| err.into_error())?,
             ),
-            flattened_documents: flattened_documents
-                .into_inner()
-                .map_err(|err| err.into_error())?,
+            flattened_documents: Some(
+                flattened_documents.into_inner().map_err(|err| err.into_error())?,
+            ),
         })
     }
 
@@ -840,6 +840,9 @@ impl<'a, 'i> Transform<'a, 'i> {
         original_obkv_buffer: &mut Vec<u8>,
         flattened_obkv_buffer: &mut Vec<u8>,
     ) -> Result<()> {
+        /// TODO do a XOR of the faceted fields
+        /// TODO if reindex_searchable returns true store all searchables else none
+        /// TODO no longer useful after Tamo's PR
         let mut old_fields_ids_map = settings_diff.old.fields_ids_map.clone();
         let mut new_fields_ids_map = settings_diff.new.fields_ids_map.clone();
         let mut obkv_writer = KvWriter::<_, FieldId>::memory();
@@ -907,14 +910,19 @@ impl<'a, 'i> Transform<'a, 'i> {
         };
 
         // We initialize the sorter with the user indexing settings.
-        let mut flattened_sorter = create_sorter(
-            grenad::SortAlgorithm::Stable,
-            keep_first,
-            self.indexer_settings.chunk_compression_type,
-            self.indexer_settings.chunk_compression_level,
-            self.indexer_settings.max_nb_chunks,
-            self.indexer_settings.max_memory.map(|mem| mem / 2),
-        );
+        let mut flattened_sorter =
+            if settings_diff.reindex_searchable() || settings_diff.reindex_facets() {
+                Some(create_sorter(
+                    grenad::SortAlgorithm::Stable,
+                    keep_first,
+                    self.indexer_settings.chunk_compression_type,
+                    self.indexer_settings.chunk_compression_level,
+                    self.indexer_settings.max_nb_chunks,
+                    self.indexer_settings.max_memory.map(|mem| mem / 2),
+                ))
+            } else {
+                None
+            };
 
         let mut original_obkv_buffer = Vec::new();
         let mut flattened_obkv_buffer = Vec::new();
@@ -938,7 +946,9 @@ impl<'a, 'i> Transform<'a, 'i> {
             if let Some(original_sorter) = original_sorter.as_mut() {
                 original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
             }
-            flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
+            if let Some(flattened_sorter) = flattened_sorter.as_mut() {
+                flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
+            }
         }
 
         let grenad_params = GrenadParameters {
@@ -949,7 +959,10 @@ impl<'a, 'i> Transform<'a, 'i> {
         };
 
         // Once we have written all the documents, we merge everything into a Reader.
-        let flattened_documents = sorter_into_reader(flattened_sorter, grenad_params)?;
+        let flattened_documents = match flattened_sorter {
+            Some(flattened_sorter) => Some(sorter_into_reader(flattened_sorter, grenad_params)?),
+            None => None,
+        };
         let original_documents = match original_sorter {
             Some(original_sorter) => Some(sorter_into_reader(original_sorter, grenad_params)?),
             None => None,
@@ -961,7 +974,7 @@ impl<'a, 'i> Transform<'a, 'i> {
             settings_diff,
             documents_count,
             original_documents: original_documents.map(|od| od.into_inner().into_inner()),
-            flattened_documents: flattened_documents.into_inner().into_inner(),
+            flattened_documents: flattened_documents.map(|fd| fd.into_inner().into_inner()),
         })
     }
 }
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 0599bb9d8..c7d6ff0fd 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1099,6 +1099,7 @@ impl InnerIndexSettingsDiff {
     }
 
     pub fn reindex_searchable(&self) -> bool {
+        // TODO no longer useful after Tamo's PR
         self.old
             .fields_ids_map
             .iter()

From 8f7c8ca7f0bf01b234b8f9d7d2435166a79f56bd Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 22 May 2024 12:23:43 +0200
Subject: [PATCH 45/56] Remove now unused error variant

---
 meilisearch-types/src/error.rs | 1 -
 milli/src/error.rs             | 2 --
 2 files changed, 3 deletions(-)

diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index 158dfae92..85a2cd767 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -384,7 +384,6 @@ impl ErrorCode for milli::Error {
                     UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
                     UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
                     UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
-                    UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
                     UserError::TooManyVectors(_, _) => Code::TooManyVectors,
                     UserError::SortError(_) => Code::InvalidSearchSort,
                     UserError::InvalidMinTypoWordLenSetting(_, _) => {
diff --git a/milli/src/error.rs b/milli/src/error.rs
index e60252ec1..83754afe4 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -117,8 +117,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
     InvalidGeoField(#[from] GeoError),
     #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
     InvalidVectorDimensions { expected: usize, found: usize },
-    #[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
-    InvalidVectorsType { document_id: Value, value: Value, subfield: String },
     #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
     InvalidVectorsMapType { document_id: String, value: Value },
     #[error("{0}")]

From 16037e21692a06c9ec7bacc2d4983cd00d91360c Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 22 May 2024 12:24:51 +0200
Subject: [PATCH 46/56] Don't remove embedders that are not in the config from
 the document DB

---
 milli/src/update/index_documents/typed_chunk.rs | 15 +++++++++------
 milli/src/vector/parsed_vectors.rs              | 12 ++++++++----
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 6615a4bc3..2345551ab 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use std::collections::{BTreeSet, HashMap};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
@@ -211,6 +211,8 @@ pub(crate) fn write_typed_chunk_into_index(
             let mut docids = index.documents_ids(wtxn)?;
             let mut iter = merger.into_stream_merger_iter()?;
 
+            let embedders: BTreeSet<_> =
+                index.embedding_configs(wtxn)?.into_iter().map(|(k, _v)| k).collect();
             let mut vectors_buffer = Vec::new();
             while let Some((key, reader)) = iter.next()? {
                 let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
@@ -225,9 +227,8 @@ pub(crate) fn write_typed_chunk_into_index(
                     let del_add_reader = KvReaderDelAdd::new(value);
 
                     if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
-                        let addition = match vectors_fid {
-                            // for the "_vectors" field, only keep vectors that are marked as userProvided
-                            Some(vectors_fid) if vectors_fid == field_id => 'vectors: {
+                        let addition = if vectors_fid == Some(field_id) {
+                            'vectors: {
                                 vectors_buffer.clear();
                                 let Ok(mut vectors) =
                                     crate::vector::parsed_vectors::ParsedVectors::from_bytes(
@@ -237,7 +238,7 @@ pub(crate) fn write_typed_chunk_into_index(
                                     // if the `_vectors` field cannot be parsed as map of vectors, just write it as-is
                                     break 'vectors Some(addition);
                                 };
-                                vectors.retain_user_provided_vectors();
+                                vectors.retain_user_provided_vectors(&embedders);
                                 let crate::vector::parsed_vectors::ParsedVectors(vectors) = vectors;
                                 if vectors.is_empty() {
                                     // skip writing empty `_vectors` map
@@ -248,8 +249,10 @@ pub(crate) fn write_typed_chunk_into_index(
                                     .map_err(InternalError::SerdeJson)?;
                                 Some(vectors_buffer.as_slice())
                             }
-                            _ => Some(addition),
+                        } else {
+                            Some(addition)
                         };
+
                         if let Some(addition) = addition {
                             writer.insert(field_id, addition)?;
                         }
diff --git a/milli/src/vector/parsed_vectors.rs b/milli/src/vector/parsed_vectors.rs
index 4e06177de..da67ccc83 100644
--- a/milli/src/vector/parsed_vectors.rs
+++ b/milli/src/vector/parsed_vectors.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeMap;
+use std::collections::{BTreeMap, BTreeSet};
 
 use obkv::KvReader;
 use serde_json::{from_slice, Value};
@@ -89,10 +89,14 @@ impl ParsedVectors {
         Ok(ParsedVectors(value))
     }
 
-    pub fn retain_user_provided_vectors(&mut self) {
-        self.0.retain(|_k, v| match v {
+    pub fn retain_user_provided_vectors(&mut self, embedders: &BTreeSet<String>) {
+        self.0.retain(|k, v| match v {
             Vectors::ImplicitlyUserProvided(_) => true,
-            Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => *user_provided,
+            Vectors::Explicit(ExplicitVectors { embeddings: _, user_provided }) => {
+                *user_provided
+                // if the embedder is not in the config, then never touch it
+                || !embedders.contains(k)
+            }
         });
     }
 }

From 3412e7fbcfd9fdd4238741152b927a06ce0b3df5 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 22 May 2024 12:25:21 +0200
Subject: [PATCH 47/56] "[]" is deserialized as 0 embedding rather than 1
 embedding of dim 0

---
 milli/src/vector/parsed_vectors.rs | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/milli/src/vector/parsed_vectors.rs b/milli/src/vector/parsed_vectors.rs
index da67ccc83..2c61baa9e 100644
--- a/milli/src/vector/parsed_vectors.rs
+++ b/milli/src/vector/parsed_vectors.rs
@@ -136,19 +136,19 @@ fn to_vector_map(
 #[serde(transparent)]
 pub struct VectorOrArrayOfVectors {
     #[serde(with = "either::serde_untagged_optional")]
-    inner: Option<either::Either<Embedding, Vec<Embedding>>>,
+    inner: Option<either::Either<Vec<Embedding>, Embedding>>,
 }
 
 impl VectorOrArrayOfVectors {
     pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> {
         match self.inner? {
-            either::Either::Left(vector) => Some(vec![vector]),
-            either::Either::Right(vectors) => Some(vectors),
+            either::Either::Left(vectors) => Some(vectors),
+            either::Either::Right(vector) => Some(vec![vector]),
         }
     }
 
     pub fn from_array_of_vectors(array_of_vec: Vec<Embedding>) -> Self {
-        Self { inner: Some(either::Either::Right(array_of_vec)) }
+        Self { inner: Some(either::Either::Left(array_of_vec)) }
     }
 }
 
@@ -167,12 +167,7 @@ mod test {
             serde_json::from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();
 
         insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null");
-        // 👇 is the the intended behavior? would rather expect [] here, but changing that is a breaking change...
-        insta::assert_json_snapshot!(empty.into_array_of_vectors(), @r###"
-        [
-          []
-        ]
-        "###);
+        insta::assert_json_snapshot!(empty.into_array_of_vectors(), @"[]");
         insta::assert_json_snapshot!(one.into_array_of_vectors(), @r###"
         [
           [

From 8a941c0241cb62956662f9725782b5a1db132339 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 22 May 2024 12:26:00 +0200
Subject: [PATCH 48/56] Smaller review changes

---
 index-scheduler/src/batch.rs                    | 17 ++++++++++-------
 milli/src/index.rs                              |  4 ++--
 .../extract/extract_vector_points.rs            |  4 +---
 milli/src/update/index_documents/extract/mod.rs |  2 +-
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index 1f5ec76b9..d10f83a0a 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -31,7 +31,9 @@ use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
     IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
-use meilisearch_types::milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
+use meilisearch_types::milli::vector::parsed_vectors::{
+    ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
+};
 use meilisearch_types::milli::{self, Filter};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
@@ -955,12 +957,13 @@ impl IndexScheduler {
                             for (embedder_name, embeddings) in embeddings {
                                 // don't change the entry if it already exists, because it was user-provided
                                 vectors.entry(embedder_name).or_insert_with(|| {
-
-                                        let embeddings = milli::vector::parsed_vectors::ExplicitVectors {
-                                            embeddings: milli::vector::parsed_vectors::VectorOrArrayOfVectors::from_array_of_vectors(embeddings),
-                                            user_provided: false,
-                                        };
-                                        serde_json::to_value(embeddings).unwrap()
+                                    let embeddings = ExplicitVectors {
+                                        embeddings: VectorOrArrayOfVectors::from_array_of_vectors(
+                                            embeddings,
+                                        ),
+                                        user_provided: false,
+                                    };
+                                    serde_json::to_value(embeddings).unwrap()
                                 });
                             }
                         }
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 66cd6f3cc..982be0139 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -23,7 +23,7 @@ use crate::heed_codec::{
 };
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
-use crate::vector::EmbeddingConfig;
+use crate::vector::{Embedding, EmbeddingConfig};
 use crate::{
     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
@@ -1611,7 +1611,7 @@ impl Index {
         &self,
         rtxn: &RoTxn<'_>,
         docid: DocumentId,
-    ) -> Result<BTreeMap<String, Vec<crate::vector::Embedding>>> {
+    ) -> Result<BTreeMap<String, Vec<Embedding>>> {
         let mut res = BTreeMap::new();
         for row in self.embedder_category_id.iter(rtxn)? {
             let (embedder_name, embedder_id) = row?;
diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs
index 8b78a8c55..724d9ea81 100644
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -89,7 +89,7 @@ struct EmbedderVectorExtractor {
 pub fn extract_vector_points<R: io::Read + io::Seek>(
     obkv_documents: grenad::Reader<R>,
     indexer: GrenadParameters,
-    settings_diff: Arc<InnerIndexSettingsDiff>,
+    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<Vec<ExtractedVectorPoints>> {
     puffin::profile_function!();
 
@@ -258,8 +258,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
         }
     }
 
-    /////
-
     let mut results = Vec::new();
 
     for EmbedderVectorExtractor {
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 0ea0fcc5c..7598c8094 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -241,7 +241,7 @@ fn send_original_documents_data(
         let original_documents_chunk = original_documents_chunk.clone();
         let lmdb_writer_sx = lmdb_writer_sx.clone();
         rayon::spawn(move || {
-            match extract_vector_points(original_documents_chunk.clone(), indexer, settings_diff) {
+            match extract_vector_points(original_documents_chunk.clone(), indexer, &settings_diff) {
                 Ok(extracted_vectors) => {
                     for ExtractedVectorPoints {
                         manual_vectors,

From bc5663e673ccb4f364fb384b5562fda0c1521416 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 22 May 2024 16:06:15 +0200
Subject: [PATCH 49/56] FieldIdsMap no longer useful thanks to #4631

---
 milli/src/update/index_documents/transform.rs |  6 +++---
 milli/src/update/settings.rs                  | 10 ++--------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index 8bedd778e..aef4d1583 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -840,9 +840,9 @@ impl<'a, 'i> Transform<'a, 'i> {
         original_obkv_buffer: &mut Vec<u8>,
         flattened_obkv_buffer: &mut Vec<u8>,
     ) -> Result<()> {
-        /// TODO do a XOR of the faceted fields
-        /// TODO if reindex_searchable returns true store all searchables else none
-        /// TODO no longer useful after Tamo's PR
+        // TODO do a XOR of the faceted fields
+        // TODO if reindex_searchable returns true store all searchables else none
+        // TODO no longer useful after Tamo's PR
         let mut old_fields_ids_map = settings_diff.old.fields_ids_map.clone();
         let mut new_fields_ids_map = settings_diff.new.fields_ids_map.clone();
         let mut obkv_writer = KvWriter::<_, FieldId>::memory();
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index c7d6ff0fd..1529e1fe6 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1099,14 +1099,8 @@ impl InnerIndexSettingsDiff {
     }
 
     pub fn reindex_searchable(&self) -> bool {
-        // TODO no longer useful after Tamo's PR
-        self.old
-            .fields_ids_map
-            .iter()
-            .zip(self.new.fields_ids_map.iter())
-            .any(|(old, new)| old != new)
-            || self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
-                != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
+        self.old.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
+            != self.new.stop_words.as_ref().map(|set| set.as_fst().as_bytes())
             || self.old.allowed_separators != self.new.allowed_separators
             || self.old.dictionary != self.new.dictionary
             || self.old.user_defined_searchable_fields != self.new.user_defined_searchable_fields

From fe17c0f52e22b30fb6aec06fb233fbd4afbccf8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 22 May 2024 16:05:55 +0200
Subject: [PATCH 50/56] Construct the minimal OBKVs according to the settings
 diff

---
 meilisearch/tests/search/mod.rs               |  20 +++
 milli/src/lib.rs                              |   3 +-
 milli/src/update/index_documents/transform.rs | 137 +++++++++++-------
 milli/src/update/settings.rs                  |  26 ++--
 4 files changed, 122 insertions(+), 64 deletions(-)

diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index f601e2b03..b02c10319 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -680,6 +680,26 @@ async fn search_facet_distribution() {
             },
         )
         .await;
+
+    index.update_settings(json!({"filterableAttributes": ["doggos.name"]})).await;
+    index.wait_task(5).await;
+
+    index
+        .search(
+            json!({
+                "facets": ["doggos.name"]
+            }),
+            |response, code| {
+                assert_eq!(code, 200, "{}", response);
+                let dist = response["facetDistribution"].as_object().unwrap();
+                assert_eq!(dist.len(), 1);
+                assert_eq!(
+                    dist["doggos.name"],
+                    json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
+                );
+            },
+        )
+        .await;
 }
 
 #[actix_rt::test]
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index 881633b5c..c74aa10e8 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -354,8 +354,7 @@ pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsR
 /// assert!(!is_faceted_by("animaux.chien", "animaux.chie"));
 /// ```
 pub fn is_faceted_by(field: &str, facet: &str) -> bool {
-    field.starts_with(facet)
-        && field[facet.len()..].chars().next().map(|c| c == '.').unwrap_or(true)
+    field.starts_with(facet) && field[facet.len()..].chars().next().map_or(true, |c| c == '.')
 }
 
 pub fn normalize_facet(original: &str) -> String {
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index aef4d1583..733e74800 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -1,7 +1,7 @@
 use std::borrow::Cow;
 use std::collections::btree_map::Entry as BEntry;
 use std::collections::hash_map::Entry as HEntry;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::fs::File;
 use std::io::{Read, Seek};
 
@@ -20,13 +20,13 @@ use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
-use crate::update::del_add::{
-    del_add_from_two_obkvs, into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd,
-};
+use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
 use crate::update::index_documents::GrenadParameters;
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
-use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result};
+use crate::{
+    is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
+};
 
 pub struct TransformOutput {
     pub primary_key: String,
@@ -808,11 +808,15 @@ impl<'a, 'i> Transform<'a, 'i> {
         })?;
 
         let old_inner_settings = InnerIndexSettings::from_index(self.index, wtxn)?;
+        let fields_ids_map = self.fields_ids_map;
+        let primary_key_id = self.index.primary_key(wtxn)?.and_then(|name| fields_ids_map.id(name));
         let mut new_inner_settings = old_inner_settings.clone();
-        new_inner_settings.fields_ids_map = self.fields_ids_map;
+        new_inner_settings.fields_ids_map = fields_ids_map;
+
         let settings_diff = InnerIndexSettingsDiff {
             old: old_inner_settings,
             new: new_inner_settings,
+            primary_key_id,
             embedding_configs_updated: false,
             settings_update_only: false,
         };
@@ -837,37 +841,66 @@ impl<'a, 'i> Transform<'a, 'i> {
     fn rebind_existing_document(
         old_obkv: KvReader<FieldId>,
         settings_diff: &InnerIndexSettingsDiff,
-        original_obkv_buffer: &mut Vec<u8>,
-        flattened_obkv_buffer: &mut Vec<u8>,
+        modified_faceted_fields: &HashSet<String>,
+        original_obkv_buffer: Option<&mut Vec<u8>>,
+        flattened_obkv_buffer: Option<&mut Vec<u8>>,
     ) -> Result<()> {
-        // TODO do a XOR of the faceted fields
-        // TODO if reindex_searchable returns true store all searchables else none
-        // TODO no longer useful after Tamo's PR
-        let mut old_fields_ids_map = settings_diff.old.fields_ids_map.clone();
-        let mut new_fields_ids_map = settings_diff.new.fields_ids_map.clone();
+        // Always keep the primary key.
+        let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
+
+        // If only the `searchableAttributes` has been changed, keep only the searchable fields.
+        let must_reindex_searchables = settings_diff.reindex_searchable();
+        let necessary_searchable_field = |id: FieldId| -> bool {
+            must_reindex_searchables
+                && (settings_diff.old.searchable_fields_ids.contains(&id)
+                    || settings_diff.new.searchable_fields_ids.contains(&id))
+        };
+
+        // If only a faceted field has been added, keep only this field.
+        let must_reindex_facets = settings_diff.reindex_facets();
+        let necessary_faceted_field = |id: FieldId| -> bool {
+            let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
+            must_reindex_facets
+                && modified_faceted_fields
+                    .iter()
+                    .any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
+        };
+
+        // Alway provide all fields when vectors are involved because
+        // we need the fields for the prompt/templating.
+        let reindex_vectors = settings_diff.reindex_vectors();
+
         let mut obkv_writer = KvWriter::<_, FieldId>::memory();
-        // We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv.
-        for (id, name) in new_fields_ids_map.iter() {
-            if let Some(val) = old_fields_ids_map.id(name).and_then(|id| old_obkv.get(id)) {
+        for (id, val) in old_obkv.iter() {
+            if is_primary_key(id)
+                || necessary_searchable_field(id)
+                || necessary_faceted_field(id)
+                || reindex_vectors
+            {
                 obkv_writer.insert(id, val)?;
             }
         }
         let data = obkv_writer.into_inner()?;
-        let new_obkv = KvReader::<FieldId>::new(&data);
+        let obkv = KvReader::<FieldId>::new(&data);
 
-        // take the non-flattened version if flatten_from_fields_ids_map returns None.
-        let old_flattened = Self::flatten_from_fields_ids_map(&old_obkv, &mut old_fields_ids_map)?;
-        let old_flattened =
-            old_flattened.as_deref().map_or_else(|| old_obkv, KvReader::<FieldId>::new);
-        let new_flattened = Self::flatten_from_fields_ids_map(&new_obkv, &mut new_fields_ids_map)?;
-        let new_flattened =
-            new_flattened.as_deref().map_or_else(|| new_obkv, KvReader::<FieldId>::new);
+        if let Some(original_obkv_buffer) = original_obkv_buffer {
+            original_obkv_buffer.clear();
+            into_del_add_obkv(obkv, DelAddOperation::DeletionAndAddition, original_obkv_buffer)?;
+        }
 
-        original_obkv_buffer.clear();
-        flattened_obkv_buffer.clear();
+        if let Some(flattened_obkv_buffer) = flattened_obkv_buffer {
+            // take the non-flattened version if flatten_from_fields_ids_map returns None.
+            let mut fields_ids_map = settings_diff.new.fields_ids_map.clone();
+            let flattened = Self::flatten_from_fields_ids_map(&obkv, &mut fields_ids_map)?;
+            let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
 
-        del_add_from_two_obkvs(&old_obkv, &new_obkv, original_obkv_buffer)?;
-        del_add_from_two_obkvs(&old_flattened, &new_flattened, flattened_obkv_buffer)?;
+            flattened_obkv_buffer.clear();
+            into_del_add_obkv(
+                flattened,
+                DelAddOperation::DeletionAndAddition,
+                flattened_obkv_buffer,
+            )?;
+        }
 
         Ok(())
     }
@@ -924,30 +957,34 @@ impl<'a, 'i> Transform<'a, 'i> {
                 None
             };
 
-        let mut original_obkv_buffer = Vec::new();
-        let mut flattened_obkv_buffer = Vec::new();
-        let mut document_sorter_key_buffer = Vec::new();
-        for result in self.index.external_documents_ids().iter(wtxn)? {
-            let (external_id, docid) = result?;
-            let old_obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
-                InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
-            )?;
+        if original_sorter.is_some() || flattened_sorter.is_some() {
+            let modified_faceted_fields = settings_diff.modified_faceted_fields();
+            let mut original_obkv_buffer = Vec::new();
+            let mut flattened_obkv_buffer = Vec::new();
+            let mut document_sorter_key_buffer = Vec::new();
+            for result in self.index.external_documents_ids().iter(wtxn)? {
+                let (external_id, docid) = result?;
+                let old_obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
+                    InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
+                )?;
 
-            Self::rebind_existing_document(
-                old_obkv,
-                &settings_diff,
-                &mut original_obkv_buffer,
-                &mut flattened_obkv_buffer,
-            )?;
+                Self::rebind_existing_document(
+                    old_obkv,
+                    &settings_diff,
+                    &modified_faceted_fields,
+                    Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),
+                    Some(&mut flattened_obkv_buffer).filter(|_| flattened_sorter.is_some()),
+                )?;
 
-            document_sorter_key_buffer.clear();
-            document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
-            document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
-            if let Some(original_sorter) = original_sorter.as_mut() {
-                original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
-            }
-            if let Some(flattened_sorter) = flattened_sorter.as_mut() {
-                flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
+                if let Some(original_sorter) = original_sorter.as_mut() {
+                    document_sorter_key_buffer.clear();
+                    document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
+                    original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
+                }
+                if let Some(flattened_sorter) = flattened_sorter.as_mut() {
+                    flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
+                }
             }
         }
 
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 1529e1fe6..0fd39ce77 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1067,10 +1067,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
         let embedding_configs_updated = self.update_embedding_configs()?;
 
-        let new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
+        let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn)?;
+        new_inner_settings.recompute_facets(self.wtxn, self.index)?;
+
+        let primary_key_id = self
+            .index
+            .primary_key(self.wtxn)?
+            .and_then(|name| new_inner_settings.fields_ids_map.id(name));
         let inner_settings_diff = InnerIndexSettingsDiff {
             old: old_inner_settings,
             new: new_inner_settings,
+            primary_key_id,
             embedding_configs_updated,
             settings_update_only: true,
         };
@@ -1086,10 +1093,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 pub struct InnerIndexSettingsDiff {
     pub(crate) old: InnerIndexSettings,
     pub(crate) new: InnerIndexSettings,
-
+    pub(crate) primary_key_id: Option<FieldId>,
     // TODO: compare directly the embedders.
     pub(crate) embedding_configs_updated: bool,
-
     pub(crate) settings_update_only: bool,
 }
 
@@ -1127,15 +1133,7 @@ impl InnerIndexSettingsDiff {
             return true;
         }
 
-        let faceted_updated =
-            (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields);
-
-        self.old
-            .fields_ids_map
-            .iter()
-            .zip(self.new.fields_ids_map.iter())
-            .any(|(old, new)| old != new)
-            || faceted_updated
+        (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
     }
 
     pub fn reindex_vectors(&self) -> bool {
@@ -1145,6 +1143,10 @@ impl InnerIndexSettingsDiff {
     pub fn settings_update_only(&self) -> bool {
         self.settings_update_only
     }
+
+    pub fn modified_faceted_fields(&self) -> HashSet<String> {
+        &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
+    }
 }
 
 #[derive(Clone)]

From e3407056347571d3938938ecab1968dda0aeb6a4 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 23 May 2024 15:29:06 +0200
Subject: [PATCH 51/56] Change benchmark outputs

- logs to stderr instead of stdout
- prints links to the dashboard when there is a dashboard
---
 xtask/src/bench/client.rs    |  4 ++++
 xtask/src/bench/dashboard.rs | 43 ++++++++++++++++++++++++++---------
 xtask/src/bench/mod.rs       | 44 ++++++++++++++++++++++++++++++++----
 3 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/xtask/src/bench/client.rs b/xtask/src/bench/client.rs
index 3e46615cc..1c2b743af 100644
--- a/xtask/src/bench/client.rs
+++ b/xtask/src/bench/client.rs
@@ -55,6 +55,10 @@ impl Client {
     pub fn delete(&self, route: &str) -> reqwest::RequestBuilder {
         self.request(reqwest::Method::DELETE, route)
     }
+
+    pub fn base_url(&self) -> Option<&str> {
+        self.base_url.as_deref()
+    }
 }
 
 #[derive(Debug, Clone, Copy, Deserialize)]
diff --git a/xtask/src/bench/dashboard.rs b/xtask/src/bench/dashboard.rs
index 3ba0ca58b..67353f7bb 100644
--- a/xtask/src/bench/dashboard.rs
+++ b/xtask/src/bench/dashboard.rs
@@ -18,12 +18,9 @@ pub enum DashboardClient {
 }
 
 impl DashboardClient {
-    pub fn new(dashboard_url: &str, api_key: Option<&str>) -> anyhow::Result<Self> {
-        let dashboard_client = Client::new(
-            Some(format!("{}/api/v1", dashboard_url)),
-            api_key,
-            Some(std::time::Duration::from_secs(60)),
-        )?;
+    pub fn new(dashboard_url: String, api_key: Option<&str>) -> anyhow::Result<Self> {
+        let dashboard_client =
+            Client::new(Some(dashboard_url), api_key, Some(std::time::Duration::from_secs(60)))?;
 
         Ok(Self::Client(dashboard_client))
     }
@@ -36,7 +33,7 @@ impl DashboardClient {
         let Self::Client(dashboard_client) = self else { return Ok(()) };
 
         let response = dashboard_client
-            .put("machine")
+            .put("/api/v1/machine")
             .json(&json!({"hostname": env.hostname}))
             .send()
             .await
@@ -62,7 +59,7 @@ impl DashboardClient {
         let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
 
         let response = dashboard_client
-            .put("invocation")
+            .put("/api/v1/invocation")
             .json(&json!({
                 "commit": {
                     "sha1": build_info.commit_sha1,
@@ -97,7 +94,7 @@ impl DashboardClient {
         let Self::Client(dashboard_client) = self else { return Ok(Uuid::now_v7()) };
 
         let response = dashboard_client
-            .put("workload")
+            .put("/api/v1/workload")
             .json(&json!({
                 "invocation_uuid": invocation_uuid,
                 "name": &workload.name,
@@ -124,7 +121,7 @@ impl DashboardClient {
         let Self::Client(dashboard_client) = self else { return Ok(()) };
 
         let response = dashboard_client
-            .put("run")
+            .put("/api/v1/run")
             .json(&json!({
                 "workload_uuid": workload_uuid,
                 "data": report
@@ -159,7 +156,7 @@ impl DashboardClient {
     pub async fn mark_as_failed(&self, invocation_uuid: Uuid, failure_reason: Option<String>) {
         if let DashboardClient::Client(client) = self {
             let response = client
-                .post("cancel-invocation")
+                .post("/api/v1/cancel-invocation")
                 .json(&json!({
                     "invocation_uuid": invocation_uuid,
                     "failure_reason": failure_reason,
@@ -186,4 +183,28 @@ impl DashboardClient {
 
         tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
     }
+
+    /// Result URL in markdown
+    pub(crate) fn result_url(
+        &self,
+        workload_name: &str,
+        build_info: &build_info::BuildInfo,
+        baseline_branch: &str,
+    ) -> String {
+        let Self::Client(client) = self else { return Default::default() };
+        let Some(base_url) = client.base_url() else { return Default::default() };
+
+        let Some(commit_sha1) = build_info.commit_sha1 else { return Default::default() };
+
+        // https://bench.meilisearch.dev/view_spans?commit_sha1=500ddc76b549fb9f1af54b2dd6abfa15960381bb&workload_name=settings-add-remove-filters.json&target_branch=reduce-transform-disk-usage&baseline_branch=main
+        let mut url = format!(
+            "{base_url}/view_spans?commit_sha1={commit_sha1}&workload_name={workload_name}"
+        );
+
+        if let Some(target_branch) = build_info.branch {
+            url += &format!("&target_branch={target_branch}&baseline_branch={baseline_branch}");
+        }
+
+        format!("[{workload_name} compared with {baseline_branch}]({url})")
+    }
 }
diff --git a/xtask/src/bench/mod.rs b/xtask/src/bench/mod.rs
index 844b64f63..fdb2c4963 100644
--- a/xtask/src/bench/mod.rs
+++ b/xtask/src/bench/mod.rs
@@ -6,6 +6,7 @@ mod env_info;
 mod meili_process;
 mod workload;
 
+use std::io::LineWriter;
 use std::path::PathBuf;
 
 use anyhow::Context;
@@ -90,6 +91,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
 
     let subscriber = tracing_subscriber::registry().with(
         tracing_subscriber::fmt::layer()
+            .with_writer(|| LineWriter::new(std::io::stderr()))
             .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
             .with_filter(filter),
     );
@@ -110,7 +112,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
     let dashboard_client = if args.no_dashboard {
         dashboard::DashboardClient::new_dry()
     } else {
-        dashboard::DashboardClient::new(&args.dashboard_url, args.api_key.as_deref())?
+        dashboard::DashboardClient::new(args.dashboard_url.clone(), args.api_key.as_deref())?
     };
 
     // reporting uses its own client because keeping the stream open to wait for entries
@@ -136,7 +138,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
         let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
         let max_workloads = args.workload_file.len();
         let reason: Option<&str> = args.reason.as_deref();
-        let invocation_uuid = dashboard_client.create_invocation( build_info, commit_message, env, max_workloads, reason).await?;
+        let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;
 
         tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
 
@@ -144,6 +146,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
         let workload_runs = tokio::spawn(
             {
                 let dashboard_client = dashboard_client.clone();
+                let mut dashboard_urls = Vec::new();
                 async move {
             for workload_file in args.workload_file.iter() {
                 let workload: Workload = serde_json::from_reader(
@@ -152,6 +155,8 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                 )
                 .with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
 
+                let workload_name = workload.name.clone();
+
                 workload::execute(
                     &assets_client,
                     &dashboard_client,
@@ -163,8 +168,23 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                     &args,
                 )
                 .await?;
+
+                let result_url = dashboard_client.result_url(&workload_name, &build_info, "main");
+
+                if !result_url.is_empty() {
+                dashboard_urls.push(result_url);
+                }
+
+                if let Some(branch) = build_info.branch {
+                    let result_url = dashboard_client.result_url(&workload_name, &build_info, branch);
+
+
+                    if !result_url.is_empty() {
+                    dashboard_urls.push(result_url);
+                    }
+                }
             }
-            Ok::<(), anyhow::Error>(())
+            Ok::<_, anyhow::Error>(dashboard_urls)
         }});
 
         // handle ctrl-c
@@ -176,13 +196,19 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
 
         // wait for the end of the main task, handle result
         match workload_runs.await {
-            Ok(Ok(_)) => {
+            Ok(Ok(urls)) => {
                 tracing::info!("Success");
+                println!("☀️ Benchmark invocation completed, please find the results for your workloads below:");
+                for url in urls {
+                    println!("- {url}");
+                }
                 Ok::<(), anyhow::Error>(())
             }
             Ok(Err(error)) => {
                 tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
                 dashboard_client.mark_as_failed(invocation_uuid, Some(error.to_string())).await;
+                println!("☔️ Benchmark invocation failed...");
+                println!("{error}");
                 tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
                 Err(error)
             },
@@ -191,10 +217,20 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                     Ok(panic) => {
                         tracing::error!("invocation panicked, attempting to report the failure to dashboard");
                         dashboard_client.mark_as_failed( invocation_uuid, Some("Panicked".into())).await;
+                        println!("‼️ Benchmark invocation panicked 😱");
+                        let msg = match panic.downcast_ref::<&'static str>() {
+                            Some(s) => *s,
+                            None => match panic.downcast_ref::<String>() {
+                                Some(s) => &s[..],
+                                None => "Box<dyn Any>",
+                            },
+                        };
+                        println!("panicked at {msg}");
                         std::panic::resume_unwind(panic)
                     }
                     Err(_) => {
                         tracing::warn!("task was canceled");
+                        println!("🚫 Benchmark invocation was canceled");
                         Ok(())
                     }
                 }

From eaf57056cabeab6a776b2aeb7c093b13bea6bdcb Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 23 May 2024 15:33:07 +0200
Subject: [PATCH 52/56] comment with the results of benchmarks

---
 .github/workflows/bench-pr.yml | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml
index 418a23717..36af79460 100644
--- a/.github/workflows/bench-pr.yml
+++ b/.github/workflows/bench-pr.yml
@@ -43,4 +43,11 @@ jobs:
 
         - name: Run benchmarks on PR ${{ github.event.issue.id }}
           run: |
-            cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" -- ${{ steps.command.outputs.command-arguments }}
\ No newline at end of file
+            cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" \
+               --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" \
+               --reason "[Comment](${{ github.event.comment.html_url }}) on [#${{ github.event.issue.number }}](${{ github.event.issue.html_url }})" \
+               -- ${{ steps.command.outputs.command-arguments }} > benchlinks.txt
+
+        - name: Send comment in PR
+          run: |
+            gh pr comment ${{github.event.issue.number}} --body-file benchlinks.txt

From 7f3e51349e2631fed5e67254a4dd35324f89d0db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 27 May 2024 15:53:06 +0200
Subject: [PATCH 53/56] Remove puffin for the dependencies

---
 Cargo.lock                 | 26 --------------------------
 index-scheduler/Cargo.toml |  1 -
 meilisearch/Cargo.toml     |  1 -
 milli/Cargo.toml           |  3 ---
 4 files changed, 31 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 156917462..008f18a16 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2464,7 +2464,6 @@ dependencies = [
  "meilisearch-auth",
  "meilisearch-types",
  "page_size 0.5.0",
- "puffin",
  "rayon",
  "roaring",
  "serde",
@@ -3231,12 +3230,6 @@ version = "0.4.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
 
-[[package]]
-name = "lz4_flex"
-version = "0.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b8c72594ac26bfd34f2d99dfced2edfaddfe8a476e3ff2ca0eb293d925c4f83"
-
 [[package]]
 name = "macro_rules_attribute"
 version = "0.2.0"
@@ -3341,7 +3334,6 @@ dependencies = [
  "pin-project-lite",
  "platform-dirs",
  "prometheus",
- "puffin",
  "rand",
  "rayon",
  "regex",
@@ -3509,7 +3501,6 @@ dependencies = [
  "obkv",
  "once_cell",
  "ordered-float",
- "puffin",
  "rand",
  "rand_pcg",
  "rayon",
@@ -4180,23 +4171,6 @@ version = "2.28.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94"
 
-[[package]]
-name = "puffin"
-version = "0.16.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76425abd4e1a0ad4bd6995dd974b52f414fca9974171df8e3708b3e660d05a21"
-dependencies = [
- "anyhow",
- "bincode",
- "byteorder",
- "cfg-if",
- "instant",
- "lz4_flex",
- "once_cell",
- "parking_lot",
- "serde",
-]
-
 [[package]]
 name = "pulp"
 version = "0.18.9"
diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml
index 4b6c0a36d..21fa34733 100644
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -22,7 +22,6 @@ flate2 = "1.0.28"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 page_size = "0.5.0"
-puffin = { version = "0.16.0", features = ["serialization"] }
 rayon = "1.8.1"
 roaring = { version = "0.10.2", features = ["serde"] }
 serde = { version = "1.0.195", features = ["derive"] }
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index ed62c5f48..75962c450 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -67,7 +67,6 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
 pin-project-lite = "0.2.13"
 platform-dirs = "0.3.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-puffin = { version = "0.16.0", features = ["serialization"] }
 rand = "0.8.5"
 rayon = "1.8.0"
 regex = "1.10.2"
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index c5dddd0fd..4a08e6261 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -67,9 +67,6 @@ filter-parser = { path = "../filter-parser" }
 # documents words self-join
 itertools = "0.11.0"
 
-# profiling
-puffin = "0.16.0"
-
 csv = "1.3.0"
 candle-core = { version = "0.4.1" }
 candle-transformers = { version = "0.4.1" }

From dc949ab46a7bcd60b250f4131c3fd0e4dfa41800 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 27 May 2024 15:59:14 +0200
Subject: [PATCH 54/56] Remove puffin usage

---
 index-scheduler/src/batch.rs                  |  6 --
 index-scheduler/src/lib.rs                    | 36 -----------
 milli/src/update/clear_documents.rs           |  2 -
 milli/src/update/index_documents/enrich.rs    |  2 -
 .../extract/extract_docid_word_positions.rs   |  2 -
 .../extract/extract_facet_number_docids.rs    |  2 -
 .../extract/extract_facet_string_docids.rs    |  2 -
 .../extract/extract_fid_docid_facet_values.rs |  2 -
 .../extract/extract_fid_word_count_docids.rs  |  2 -
 .../extract/extract_geo_points.rs             |  2 -
 .../extract/extract_vector_points.rs          |  4 --
 .../extract/extract_word_docids.rs            |  4 --
 .../extract_word_pair_proximity_docids.rs     |  5 --
 .../extract/extract_word_position_docids.rs   |  4 --
 .../src/update/index_documents/extract/mod.rs | 11 +---
 .../index_documents/helpers/grenad_helpers.rs |  3 -
 milli/src/update/index_documents/mod.rs       | 17 +----
 milli/src/update/index_documents/transform.rs |  8 ---
 .../src/update/index_documents/typed_chunk.rs | 62 -------------------
 milli/src/update/settings.rs                  |  2 -
 milli/src/update/word_prefix_docids.rs        |  2 -
 .../src/update/words_prefix_integer_docids.rs |  1 -
 milli/src/update/words_prefixes_fst.rs        |  2 -
 23 files changed, 2 insertions(+), 181 deletions(-)

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index d10f83a0a..181ac49a3 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -529,8 +529,6 @@ impl IndexScheduler {
         #[cfg(test)]
         self.maybe_fail(crate::tests::FailureLocation::InsideCreateBatch)?;
 
-        puffin::profile_function!();
-
         let enqueued = &self.get_status(rtxn, Status::Enqueued)?;
         let to_cancel = self.get_kind(rtxn, Kind::TaskCancelation)? & enqueued;
 
@@ -639,8 +637,6 @@ impl IndexScheduler {
             self.breakpoint(crate::Breakpoint::InsideProcessBatch);
         }
 
-        puffin::profile_function!(batch.to_string());
-
         match batch {
             Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
                 // 1. Retrieve the tasks that matched the query at enqueue-time.
@@ -1226,8 +1222,6 @@ impl IndexScheduler {
         index: &'i Index,
         operation: IndexOperation,
     ) -> Result<Vec<Task>> {
-        puffin::profile_function!();
-
         match operation {
             IndexOperation::DocumentClear { mut tasks, .. } => {
                 let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index e4c9cd08f..8a1c2f540 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -33,7 +33,6 @@ pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;
 
 use std::collections::{BTreeMap, HashMap};
-use std::fs::File;
 use std::io::{self, BufReader, Read};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
@@ -59,7 +58,6 @@ use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfi
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
-use puffin::FrameView;
 use rayon::current_num_threads;
 use rayon::prelude::{IntoParallelIterator, ParallelIterator};
 use roaring::RoaringBitmap;
@@ -344,9 +342,6 @@ pub struct IndexScheduler {
     /// The Authorization header to send to the webhook URL.
     pub(crate) webhook_authorization_header: Option<String>,
 
-    /// A frame to output the indexation profiling files to disk.
-    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
-
     /// The path used to create the dumps.
     pub(crate) dumps_path: PathBuf,
 
@@ -401,7 +396,6 @@ impl IndexScheduler {
             cleanup_enabled: self.cleanup_enabled,
             max_number_of_tasks: self.max_number_of_tasks,
             max_number_of_batched_tasks: self.max_number_of_batched_tasks,
-            puffin_frame: self.puffin_frame.clone(),
             snapshots_path: self.snapshots_path.clone(),
             dumps_path: self.dumps_path.clone(),
             auth_path: self.auth_path.clone(),
@@ -500,7 +494,6 @@ impl IndexScheduler {
             env,
             // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
             wake_up: Arc::new(SignalEvent::auto(true)),
-            puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
             autobatching_enabled: options.autobatching_enabled,
             cleanup_enabled: options.cleanup_enabled,
             max_number_of_tasks: options.max_number_of_tasks,
@@ -621,10 +614,6 @@ impl IndexScheduler {
                 run.wake_up.wait();
 
                 loop {
-                    let puffin_enabled = run.features().check_puffin().is_ok();
-                    puffin::set_scopes_on(puffin_enabled);
-                    puffin::GlobalProfiler::lock().new_frame();
-
                     match run.tick() {
                         Ok(TickOutcome::TickAgain(_)) => (),
                         Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
@@ -636,31 +625,6 @@ impl IndexScheduler {
                             }
                         }
                     }
-
-                    // Let's write the previous frame to disk but only if
-                    // the user wanted to profile with puffin.
-                    if puffin_enabled {
-                        let mut frame_view = run.puffin_frame.lock();
-                        if !frame_view.is_empty() {
-                            let now = OffsetDateTime::now_utc();
-                            let mut file = match File::create(format!("{}.puffin", now)) {
-                                Ok(file) => file,
-                                Err(e) => {
-                                    tracing::error!("{e}");
-                                    continue;
-                                }
-                            };
-                            if let Err(e) = frame_view.save_to_writer(&mut file) {
-                                tracing::error!("{e}");
-                            }
-                            if let Err(e) = file.sync_all() {
-                                tracing::error!("{e}");
-                            }
-                            // We erase this frame view as it is no more useful. We want to
-                            // measure the new frames now that we exported the previous ones.
-                            *frame_view = FrameView::default();
-                        }
-                    }
                 }
             })
             .unwrap();
diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs
index 6715939dc..3490b55e4 100644
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -21,8 +21,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
         name = "clear_documents"
     )]
     pub fn execute(self) -> Result<u64> {
-        puffin::profile_function!();
-
         self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
         let Index {
             env: _env,
diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs
index 162136912..2da717bb0 100644
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@@ -29,8 +29,6 @@ pub fn enrich_documents_batch<R: Read + Seek>(
     autogenerate_docids: bool,
     reader: DocumentsBatchReader<R>,
 ) -> Result<StdResult<EnrichedDocumentsBatchReader<R>, UserError>> {
-    puffin::profile_function!();
-
     let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();
 
     let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index d97b6639e..9c557de81 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -29,8 +29,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
     settings_diff: &InnerIndexSettingsDiff,
     max_positions_per_attributes: Option<u32>,
 ) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
-    puffin::profile_function!();
-
     let max_positions_per_attributes = max_positions_per_attributes
         .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
     let max_memory = indexer.max_memory_by_thread();
diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
index 1848a085f..bfd769604 100644
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@@ -23,8 +23,6 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
-
     let max_memory = indexer.max_memory_by_thread();
 
     let mut facet_number_docids_sorter = create_sorter(
diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
index abffe17ab..3deace127 100644
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@@ -28,8 +28,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
-    puffin::profile_function!();
-
     let max_memory = indexer.max_memory_by_thread();
     let options = NormalizerOption { lossy: true, ..Default::default() };
 
diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 123c3b123..a2b060255 100644
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -47,8 +47,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     settings_diff: &InnerIndexSettingsDiff,
     geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
-    puffin::profile_function!();
-
     let max_memory = indexer.max_memory_by_thread();
 
     let mut fid_docid_facet_numbers_sorter = create_sorter(
diff --git a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
index 51e0642da..f252df1cd 100644
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@@ -26,8 +26,6 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
-
     let max_memory = indexer.max_memory_by_thread();
 
     let mut fid_word_count_docids_sorter = create_sorter(
diff --git a/milli/src/update/index_documents/extract/extract_geo_points.rs b/milli/src/update/index_documents/extract/extract_geo_points.rs
index cfcc021c6..3d7463fba 100644
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -20,8 +20,6 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
     primary_key_id: FieldId,
     (lat_fid, lng_fid): (FieldId, FieldId),
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
-
     let mut writer = create_writer(
         indexer.chunk_compression_type,
         indexer.chunk_compression_level,
diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs
index 724d9ea81..76ec90d65 100644
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -91,8 +91,6 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<Vec<ExtractedVectorPoints>> {
-    puffin::profile_function!();
-
     let reindex_vectors = settings_diff.reindex_vectors();
 
     let old_fields_ids_map = &settings_diff.old.fields_ids_map;
@@ -295,7 +293,6 @@ fn push_vectors_diff(
     delta: VectorStateDelta,
     reindex_vectors: bool,
 ) -> Result<()> {
-    puffin::profile_function!();
     let (must_remove, prompt, (mut del_vectors, mut add_vectors)) = delta.into_values();
     if must_remove
     // TODO: the below condition works because we erase the vec database when a embedding setting changes.
@@ -367,7 +364,6 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
     embedder: Arc<Embedder>,
     request_threads: &ThreadPoolNoAbort,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
     let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
     let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk
 
diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs
index 5699f2fb6..457d2359e 100644
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@@ -36,8 +36,6 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
     grenad::Reader<BufReader<File>>,
     grenad::Reader<BufReader<File>>,
 )> {
-    puffin::profile_function!();
-
     let max_memory = indexer.max_memory_by_thread();
 
     let mut word_fid_docids_sorter = create_sorter(
@@ -167,8 +165,6 @@ fn words_into_sorter(
     add_words: &BTreeSet<Vec<u8>>,
     word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
 ) -> Result<()> {
-    puffin::profile_function!();
-
     use itertools::merge_join_by;
     use itertools::EitherOrBoth::{Both, Left, Right};
 
diff --git a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
index 23f70ccd2..617338f9f 100644
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -26,7 +26,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
     let any_deletion = settings_diff.old.proximity_precision == ProximityPrecision::ByWord;
     let any_addition = settings_diff.new.proximity_precision == ProximityPrecision::ByWord;
 
@@ -71,8 +70,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
 
         // if we change document, we fill the sorter
         if current_document_id.map_or(false, |id| id != document_id) {
-            puffin::profile_scope!("Document into sorter");
-
             // FIXME: span inside of a hot loop might degrade performance and create big reports
             let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
             let _entered = span.enter();
@@ -163,7 +160,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
     }
 
     if let Some(document_id) = current_document_id {
-        puffin::profile_scope!("Final document into sorter");
         // FIXME: span inside of a hot loop might degrade performance and create big reports
         let span = tracing::trace_span!(target: "indexing::details", "final_document_into_sorter");
         let _entered = span.enter();
@@ -176,7 +172,6 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
         )?;
     }
     {
-        puffin::profile_scope!("sorter_into_reader");
         // FIXME: span inside of a hot loop might degrade performance and create big reports
         let span = tracing::trace_span!(target: "indexing::details", "sorter_into_reader");
         let _entered = span.enter();
diff --git a/milli/src/update/index_documents/extract/extract_word_position_docids.rs b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
index 45a05b0d0..50b1617f9 100644
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@@ -25,8 +25,6 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     _settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
-
     let max_memory = indexer.max_memory_by_thread();
 
     let mut word_position_docids_sorter = create_sorter(
@@ -104,8 +102,6 @@ fn words_position_into_sorter(
     add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
     word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
 ) -> Result<()> {
-    puffin::profile_function!();
-
     use itertools::merge_join_by;
     use itertools::EitherOrBoth::{Both, Left, Right};
 
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 7598c8094..90723bc4a 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -47,8 +47,6 @@ pub(crate) fn data_from_obkv_documents(
     settings_diff: Arc<InnerIndexSettingsDiff>,
     max_positions_per_attributes: Option<u32>,
 ) -> Result<()> {
-    puffin::profile_function!();
-
     let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
         || {
             original_obkv_chunks
@@ -90,7 +88,6 @@ pub(crate) fn data_from_obkv_documents(
                             lmdb_writer_sx.clone(),
                             extract_fid_word_count_docids,
                             TypedChunk::FieldIdWordCountDocids,
-                            "field-id-wordcount-docids",
                         );
                         run_extraction_task::<
                             _,
@@ -117,7 +114,6 @@ pub(crate) fn data_from_obkv_documents(
                                     word_fid_docids_reader,
                                 }
                             },
-                            "word-docids",
                         );
 
                         run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
@@ -127,7 +123,6 @@ pub(crate) fn data_from_obkv_documents(
                             lmdb_writer_sx.clone(),
                             extract_word_position_docids,
                             TypedChunk::WordPositionDocids,
-                            "word-position-docids",
                         );
 
                         run_extraction_task::<
@@ -141,7 +136,6 @@ pub(crate) fn data_from_obkv_documents(
                             lmdb_writer_sx.clone(),
                             extract_facet_string_docids,
                             TypedChunk::FieldIdFacetStringDocids,
-                            "field-id-facet-string-docids",
                         );
 
                         run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
@@ -151,7 +145,6 @@ pub(crate) fn data_from_obkv_documents(
                             lmdb_writer_sx.clone(),
                             extract_facet_number_docids,
                             TypedChunk::FieldIdFacetNumberDocids,
-                            "field-id-facet-number-docids",
                         );
 
                         run_extraction_task::<_, _, grenad::Reader<BufReader<File>>>(
@@ -161,7 +154,6 @@ pub(crate) fn data_from_obkv_documents(
                             lmdb_writer_sx.clone(),
                             extract_word_pair_proximity_docids,
                             TypedChunk::WordPairProximityDocids,
-                            "word-pair-proximity-docids",
                         );
                     }
 
@@ -185,7 +177,6 @@ fn run_extraction_task<FE, FS, M>(
     lmdb_writer_sx: Sender<Result<TypedChunk>>,
     extract_fn: FE,
     serialize_fn: FS,
-    name: &'static str,
 ) where
     FE: Fn(
             grenad::Reader<CursorClonableMmap>,
@@ -203,7 +194,7 @@ fn run_extraction_task<FE, FS, M>(
     rayon::spawn(move || {
         let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: &current_span, "extract_multiple_chunks");
         let _entered = child_span.enter();
-        puffin::profile_scope!("extract_multiple_chunks", name);
+
         match extract_fn(chunk, indexer, &settings_diff) {
             Ok(chunk) => {
                 let _ = lmdb_writer_sx.send(Ok(serialize_fn(chunk)));
diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs
index b0e3654a9..aa574024d 100644
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -61,7 +61,6 @@ pub fn sorter_into_reader(
     sorter: grenad::Sorter<MergeFn>,
     indexer: GrenadParameters,
 ) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
     let mut writer = create_writer(
         indexer.chunk_compression_type,
         indexer.chunk_compression_level,
@@ -182,8 +181,6 @@ where
     FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
     FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
 {
-    puffin::profile_function!();
-
     let mut buffer = Vec::new();
     let database = database.remap_types::<Bytes, Bytes>();
 
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index dccfbe795..f281becd6 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -141,8 +141,6 @@ where
         mut self,
         reader: DocumentsBatchReader<R>,
     ) -> Result<(Self, StdResult<u64, UserError>)> {
-        puffin::profile_function!();
-
         // Early return when there is no document to add
         if reader.is_empty() {
             return Ok((self, Ok(0)));
@@ -187,8 +185,6 @@ where
         mut self,
         to_delete: Vec<String>,
     ) -> Result<(Self, StdResult<u64, UserError>)> {
-        puffin::profile_function!();
-
         // Early return when there is no document to add
         if to_delete.is_empty() {
             // Maintains Invariant: remove documents actually always returns Ok for the inner result
@@ -223,8 +219,6 @@ where
         mut self,
         to_delete: &RoaringBitmap,
     ) -> Result<(Self, u64)> {
-        puffin::profile_function!();
-
         // Early return when there is no document to add
         if to_delete.is_empty() {
             return Ok((self, 0));
@@ -249,8 +243,6 @@ where
         name = "index_documents"
     )]
     pub fn execute(mut self) -> Result<DocumentAdditionResult> {
-        puffin::profile_function!();
-
         if self.added_documents == 0 && self.deleted_documents == 0 {
             let number_of_documents = self.index.number_of_documents(self.wtxn)?;
             return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
@@ -279,8 +271,6 @@ where
         FP: Fn(UpdateIndexingStep) + Sync,
         FA: Fn() -> bool + Sync,
     {
-        puffin::profile_function!();
-
         let TransformOutput {
             primary_key,
             mut settings_diff,
@@ -404,7 +394,7 @@ where
             rayon::spawn(move || {
                 let child_span = tracing::trace_span!(target: "indexing::details", parent: &current_span, "extract_and_send_grenad_chunks");
                 let _enter = child_span.enter();
-                puffin::profile_scope!("extract_and_send_grenad_chunks");
+
                 // split obkv file into several chunks
                 let original_chunk_iter = match original_documents {
                     Some(original_documents) => {
@@ -612,8 +602,6 @@ where
         FP: Fn(UpdateIndexingStep) + Sync,
         FA: Fn() -> bool + Sync,
     {
-        puffin::profile_function!();
-
         // Merged databases are already been indexed, we start from this count;
         let mut databases_seen = MERGED_DATABASE_COUNT;
 
@@ -657,7 +645,6 @@ where
         {
             let span = tracing::trace_span!(target: "indexing::details", "compute_prefix_diffs");
             let _entered = span.enter();
-            puffin::profile_scope!("compute_prefix_diffs");
 
             current_prefix_fst = self.index.words_prefixes_fst(self.wtxn)?;
 
@@ -797,8 +784,6 @@ fn execute_word_prefix_docids(
     common_prefix_fst_words: &[&[String]],
     del_prefix_fst_words: &HashSet<Vec<u8>>,
 ) -> Result<()> {
-    puffin::profile_function!();
-
     let mut builder = WordPrefixDocids::new(txn, word_docids_db, word_prefix_docids_db);
     builder.chunk_compression_type = indexer_config.chunk_compression_type;
     builder.chunk_compression_level = indexer_config.chunk_compression_level;
diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs
index 733e74800..41a0a55cf 100644
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -161,8 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> {
         FP: Fn(UpdateIndexingStep) + Sync,
         FA: Fn() -> bool + Sync,
     {
-        puffin::profile_function!();
-
         let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
         let external_documents_ids = self.index.external_documents_ids();
         let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
@@ -375,8 +373,6 @@ impl<'a, 'i> Transform<'a, 'i> {
     where
         FA: Fn() -> bool + Sync,
     {
-        puffin::profile_function!();
-
         // there may be duplicates in the documents to remove.
         to_remove.sort_unstable();
         to_remove.dedup();
@@ -466,8 +462,6 @@ impl<'a, 'i> Transform<'a, 'i> {
     where
         FA: Fn() -> bool + Sync,
     {
-        puffin::profile_function!();
-
         let mut documents_deleted = 0;
         let mut document_sorter_value_buffer = Vec::new();
         let mut document_sorter_key_buffer = Vec::new();
@@ -686,8 +680,6 @@ impl<'a, 'i> Transform<'a, 'i> {
     where
         F: Fn(UpdateIndexingStep) + Sync,
     {
-        puffin::profile_function!();
-
         let primary_key = self
             .index
             .primary_key(wtxn)?
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index 2345551ab..27f760c2a 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -118,65 +118,6 @@ impl TypedChunk {
     }
 }
 
-impl TypedChunk {
-    pub fn to_debug_string(&self) -> String {
-        match self {
-            TypedChunk::FieldIdDocidFacetStrings(grenad) => {
-                format!("FieldIdDocidFacetStrings {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdDocidFacetNumbers(grenad) => {
-                format!("FieldIdDocidFacetNumbers {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::Documents(grenad) => {
-                format!("Documents {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdWordCountDocids(grenad) => {
-                format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::WordDocids {
-                word_docids_reader,
-                exact_word_docids_reader,
-                word_fid_docids_reader,
-            } => format!(
-                "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {}, word_fid_docids_reader: {} }}",
-                word_docids_reader.len(),
-                exact_word_docids_reader.len(),
-                word_fid_docids_reader.len()
-            ),
-            TypedChunk::WordPositionDocids(grenad) => {
-                format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::WordPairProximityDocids(grenad) => {
-                format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdFacetStringDocids((grenad, _)) => {
-                format!("FieldIdFacetStringDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdFacetNumberDocids(grenad) => {
-                format!("FieldIdFacetNumberDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdFacetExistsDocids(grenad) => {
-                format!("FieldIdFacetExistsDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdFacetIsNullDocids(grenad) => {
-                format!("FieldIdFacetIsNullDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::FieldIdFacetIsEmptyDocids(grenad) => {
-                format!("FieldIdFacetIsEmptyDocids {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::GeoPoints(grenad) => {
-                format!("GeoPoints {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::VectorPoints{ remove_vectors, manual_vectors, embeddings, expected_dimension, embedder_name } => {
-                format!("VectorPoints {{ remove_vectors: {}, manual_vectors: {}, embeddings: {}, dimension: {}, embedder_name: {} }}", remove_vectors.len(), manual_vectors.len(), embeddings.as_ref().map(|e| e.len()).unwrap_or_default(), expected_dimension, embedder_name)
-            }
-            TypedChunk::ScriptLanguageDocids(sl_map) => {
-                format!("ScriptLanguageDocids {{ number_of_entries: {} }}", sl_map.len())
-            }
-        }
-    }
-}
-
 /// Write typed chunk in the corresponding LMDB database of the provided index.
 /// Return new documents seen.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::write_db")]
@@ -185,8 +126,6 @@ pub(crate) fn write_typed_chunk_into_index(
     index: &Index,
     wtxn: &mut RwTxn,
 ) -> Result<(RoaringBitmap, bool)> {
-    puffin::profile_function!(typed_chunks[0].to_debug_string());
-
     let mut is_merged_database = false;
     match typed_chunks[0] {
         TypedChunk::Documents(_) => {
@@ -877,7 +816,6 @@ where
     FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
     FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
 {
-    puffin::profile_function!();
     let mut buffer = Vec::new();
     let database = database.remap_types::<Bytes, Bytes>();
 
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 0fd39ce77..133f0e3a8 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -398,8 +398,6 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         FP: Fn(UpdateIndexingStep) + Sync,
         FA: Fn() -> bool + Sync,
     {
-        puffin::profile_function!();
-
         // if the settings are set before any document update, we don't need to do anything, and
         // will set the primary key during the first document addition.
         if self.index.number_of_documents(self.wtxn)? == 0 {
diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs
index 1db066058..925635f80 100644
--- a/milli/src/update/word_prefix_docids.rs
+++ b/milli/src/update/word_prefix_docids.rs
@@ -52,8 +52,6 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
         common_prefix_fst_words: &[&[String]],
         del_prefix_fst_words: &HashSet<Vec<u8>>,
     ) -> Result<()> {
-        puffin::profile_function!();
-
         // It is forbidden to keep a mutable reference into the database
         // and write into it at the same time, therefore we write into another file.
         let mut prefix_docids_sorter = create_sorter(
diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs
index 272d465fd..9b6aa21ae 100644
--- a/milli/src/update/words_prefix_integer_docids.rs
+++ b/milli/src/update/words_prefix_integer_docids.rs
@@ -57,7 +57,6 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
         common_prefix_fst_words: &[&[String]],
         del_prefix_fst_words: &HashSet<Vec<u8>>,
     ) -> Result<()> {
-        puffin::profile_function!();
         debug!("Computing and writing the word levels integers docids into LMDB on disk...");
 
         let mut prefix_integer_docids_sorter = create_sorter(
diff --git a/milli/src/update/words_prefixes_fst.rs b/milli/src/update/words_prefixes_fst.rs
index 8b438cef3..d47d6d14c 100644
--- a/milli/src/update/words_prefixes_fst.rs
+++ b/milli/src/update/words_prefixes_fst.rs
@@ -45,8 +45,6 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
         name = "words_prefix_fst"
     )]
     pub fn execute(self) -> Result<()> {
-        puffin::profile_function!();
-
         let words_fst = self.index.words_fst(self.wtxn)?;
 
         let mut current_prefix = vec![SmallString32::new(); self.max_prefix_length];

From b6d450d4842e863792f4324090fa16edeb652c4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 27 May 2024 15:59:28 +0200
Subject: [PATCH 55/56] Remove puffin experimental feature

---
 index-scheduler/src/features.rs    | 13 -------------
 meilisearch-types/src/features.rs  |  1 -
 meilisearch/src/routes/features.rs | 15 ++-------------
 3 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs
index 3be18a3f1..ae8e6728a 100644
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@@ -68,19 +68,6 @@ impl RoFeatures {
             .into())
         }
     }
-
-    pub fn check_puffin(&self) -> Result<()> {
-        if self.runtime.export_puffin_reports {
-            Ok(())
-        } else {
-            Err(FeatureNotEnabledError {
-                disabled_action: "Outputting Puffin reports to disk",
-                feature: "export puffin reports",
-                issue_link: "https://github.com/meilisearch/product/discussions/693",
-            }
-            .into())
-        }
-    }
 }
 
 impl FeatureData {
diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs
index 04a5d9d6f..dda9dee51 100644
--- a/meilisearch-types/src/features.rs
+++ b/meilisearch-types/src/features.rs
@@ -6,7 +6,6 @@ pub struct RuntimeTogglableFeatures {
     pub vector_store: bool,
     pub metrics: bool,
     pub logs_route: bool,
-    pub export_puffin_reports: bool,
 }
 
 #[derive(Default, Debug, Clone, Copy)]
diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs
index 227b485c5..0e02309fa 100644
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@@ -47,8 +47,6 @@ pub struct RuntimeTogglableFeatures {
     pub metrics: Option<bool>,
     #[deserr(default)]
     pub logs_route: Option<bool>,
-    #[deserr(default)]
-    pub export_puffin_reports: Option<bool>,
 }
 
 async fn patch_features(
@@ -68,21 +66,13 @@ async fn patch_features(
         vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
         metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
         logs_route: new_features.0.logs_route.unwrap_or(old_features.logs_route),
-        export_puffin_reports: new_features
-            .0
-            .export_puffin_reports
-            .unwrap_or(old_features.export_puffin_reports),
     };
 
     // explicitly destructure for analytics rather than using the `Serialize` implementation, because
     // the it renames to camelCase, which we don't want for analytics.
     // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
-    let meilisearch_types::features::RuntimeTogglableFeatures {
-        vector_store,
-        metrics,
-        logs_route,
-        export_puffin_reports,
-    } = new_features;
+    let meilisearch_types::features::RuntimeTogglableFeatures { vector_store, metrics, logs_route } =
+        new_features;
 
     analytics.publish(
         "Experimental features Updated".to_string(),
@@ -90,7 +80,6 @@ async fn patch_features(
             "vector_store": vector_store,
             "metrics": metrics,
             "logs_route": logs_route,
-            "export_puffin_reports": export_puffin_reports,
         }),
         Some(&req),
     );

From 487431a03538dc4132f8cfcff05959d4fc5e79c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 27 May 2024 16:12:20 +0200
Subject: [PATCH 56/56] Fix tests

---
 index-scheduler/src/insta_snapshot.rs      |  1 -
 meilisearch/tests/dumps/mod.rs             |  3 +--
 meilisearch/tests/features/mod.rs          | 20 +++++++-------------
 meilisearch/tests/search/hybrid.rs         |  6 ++----
 meilisearch/tests/settings/get_settings.rs |  3 +--
 5 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs
index 988e75b81..d8625a2c7 100644
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -32,7 +32,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
         features: _,
         max_number_of_tasks: _,
         max_number_of_batched_tasks: _,
-        puffin_frame: _,
         wake_up: _,
         dumps_path: _,
         snapshots_path: _,
diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs
index 1a31437f8..c8f8ca105 100644
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@@ -1859,8 +1859,7 @@ async fn import_dump_v6_containing_experimental_features() {
     {
       "vectorStore": false,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs
index 3a9812f30..9548567ff 100644
--- a/meilisearch/tests/features/mod.rs
+++ b/meilisearch/tests/features/mod.rs
@@ -20,8 +20,7 @@ async fn experimental_features() {
     {
       "vectorStore": false,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
@@ -32,8 +31,7 @@ async fn experimental_features() {
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
@@ -44,8 +42,7 @@ async fn experimental_features() {
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
@@ -57,8 +54,7 @@ async fn experimental_features() {
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
@@ -70,8 +66,7 @@ async fn experimental_features() {
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 }
@@ -90,8 +85,7 @@ async fn experimental_feature_metrics() {
     {
       "vectorStore": false,
       "metrics": true,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
@@ -146,7 +140,7 @@ async fn errors() {
     meili_snap::snapshot!(code, @"400 Bad Request");
     meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
     {
-      "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `exportPuffinReports`",
+      "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`",
       "code": "bad_request",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#bad_request"
diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index 028b341cb..9c50df6e1 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -18,8 +18,7 @@ async fn index_with_documents_user_provided<'a>(
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
@@ -47,8 +46,7 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);
 
diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs
index cd31d4959..379e0a917 100644
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -98,8 +98,7 @@ async fn secrets_are_hidden_in_settings() {
     {
       "vectorStore": true,
       "metrics": false,
-      "logsRoute": false,
-      "exportPuffinReports": false
+      "logsRoute": false
     }
     "###);