From 0dd321afc70330f730c8bc89fef9964fa479c2fb Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 14 Nov 2024 10:02:51 +0100
Subject: [PATCH 001/158] reproduce #4984

---
 crates/meilisearch/tests/search/mod.rs | 74 ++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)
diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs
index d1091d944..1dc406fb3 100644
--- a/crates/meilisearch/tests/search/mod.rs
+++ b/crates/meilisearch/tests/search/mod.rs
@@ -15,6 +15,7 @@ mod pagination;
 mod restrict_searchable;
 mod search_queue;
 
+use meili_snap::{json_string, snapshot};
 use meilisearch::Opt;
 use tempfile::TempDir;
 
@@ -62,6 +63,79 @@ async fn simple_search() {
         .await;
 }
 
+#[actix_rt::test]
+async fn search_with_stop_word() {
+    // related to https://github.com/meilisearch/meilisearch/issues/4984
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (_, code) =
+        index.update_settings(json!({"stopWords": ["the", "a", "an", "to", "in", "of"]})).await;
+    meili_snap::snapshot!(code, @"202 Accepted");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // prefix search
+    index
+        .search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
+            assert_eq!(code, 200, "{}", response);
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "title": "How to Train Your Dragon: The Hidden World",
+                "_formatted": {
+                  "title": "How to Train Your Dragon: <em>The</em> Hidden World"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // non-prefix search
+    index
+          .search(json!({"q": "to the ", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
+              assert_eq!(code, 200, "{}", response);
+              snapshot!(json_string!(response["hits"]), @r###"
+              [
+                {
+                  "title": "Shazam!",
+                  "_formatted": {
+                    "title": "Shazam!"
+                  }
+                },
+                {
+                  "title": "Captain Marvel",
+                  "_formatted": {
+                    "title": "Captain Marvel"
+                  }
+                },
+                {
+                  "title": "Escape Room",
+                  "_formatted": {
+                    "title": "Escape Room"
+                  }
+                },
+                {
+                  "title": "How to Train Your Dragon: The Hidden World",
+                  "_formatted": {
+                    "title": "How to Train Your Dragon: The Hidden World"
+                  }
+                },
+                {
+                  "title": "Gläss",
+                  "_formatted": {
+                    "title": "Gläss"
+                  }
+                }
+              ]
+              "###);
+          })
+          .await;
+}
+
 #[actix_rt::test]
 async fn phrase_search_with_stop_word() {
     // related to https://github.com/meilisearch/meilisearch/issues/3521

From 72ba35349887e64f0ae69079c8b27de21d141ba8 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 18 Nov 2024 10:03:23 +0100
Subject: [PATCH 002/158] reproduce sdk fail

---
 crates/meilisearch/tests/search/formatted.rs | 52 ++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/crates/meilisearch/tests/search/formatted.rs b/crates/meilisearch/tests/search/formatted.rs
index 1484b6393..c549cd79d 100644
--- a/crates/meilisearch/tests/search/formatted.rs
+++ b/crates/meilisearch/tests/search/formatted.rs
@@ -4,6 +4,58 @@ use super::*;
 use crate::common::Server;
 use crate::json;
 
+#[actix_rt::test]
+async fn search_formatted_from_sdk() {
+    let server = Server::new_shared();
+    let index = server.unique_index();
+
+    index
+        .update_settings(
+            json!({ "filterableAttributes": ["genre"], "searchableAttributes": ["title"] }),
+        )
+        .await;
+
+    let documents = json!([
+      { "id": 123,  "title": "Pride and Prejudice",                     "genre": "romance" },
+      { "id": 456,  "title": "Le Petit Prince",                         "genre": "adventure" },
+      { "id": 1,    "title": "Alice In Wonderland",                     "genre": "adventure" },
+      { "id": 2,    "title": "Le Rouge et le Noir",                     "genre": "romance" },
+      { "id": 1344, "title": "The Hobbit",                              "genre": "adventure" },
+      { "id": 4,    "title": "Harry Potter and the Half-Blood Prince",  "genre": "fantasy" },
+      { "id": 7,    "title": "Harry Potter and the Chamber of Secrets", "genre": "fantasy" },
+      { "id": 42,   "title": "The Hitchhiker's Guide to the Galaxy" }
+    ]);
+    let (response, _) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
+
+    index
+        .search(
+            json!({ "q":"prince",
+              "attributesToCrop": ["title"],
+              "cropLength": 2,
+              "filter": "genre = adventure",
+              "attributesToHighlight": ["title"],
+              "attributesToRetrieve": ["title"]
+            }),
+            |response, code| {
+                assert_eq!(code, 200, "{}", response);
+                allow_duplicates! {
+                  assert_json_snapshot!(response["hits"][0],
+                        { "._rankingScore" => "[score]" },
+                        @r###"
+                  {
+                    "title": "Le Petit Prince",
+                    "_formatted": {
+                      "title": "…Petit <em>Prince</em>"
+                    }
+                  }
+                  "###);
+                }
+            },
+        )
+        .await;
+}
+
 #[actix_rt::test]
 async fn formatted_contain_wildcard() {
     let server = Server::new_shared();

From 3a8051866afc97af32575806a40adf8c3b9638a0 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 18 Nov 2024 11:12:36 +0100
Subject: [PATCH 003/158] Use `return_keyword_results` function instead of
 returning raw keyword results when the embedder is broken

---
 crates/milli/src/search/hybrid.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs
index 8b274804c..90833dfe9 100644
--- a/crates/milli/src/search/hybrid.rs
+++ b/crates/milli/src/search/hybrid.rs
@@ -205,7 +205,11 @@ impl<'a> Search<'a> {
                     Ok(embedding) => embedding,
                     Err(error) => {
                         tracing::error!(error=%error, "Embedding failed");
-                        return Ok((keyword_results, Some(0)));
+                        return Ok(return_keyword_results(
+                            self.limit,
+                            self.offset,
+                            keyword_results,
+                        ));
                     }
                 }
             }

From cd796b0f4b3323c74190cc862bcd95ab3abec318 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 18 Nov 2024 11:46:00 +0100
Subject: [PATCH 004/158] Fix SDK test

---
 crates/milli/src/search/new/matches/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs
index 80e3ec7b2..c1fb18cfa 100644
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@@ -268,7 +268,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                     last_match_last_token_position_plus_one
                 } else {
                     // we have matched the end of possible tokens, there's nothing to advance
-                    tokens.len() - 1
+                    tokens.len()
                 }
             };
 

From e0c3f3d560acd3d1cc67f09656c5594c47e1603f Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 18 Nov 2024 16:08:53 +0100
Subject: [PATCH 005/158] Fix #4984

---
 crates/meilisearch/tests/search/mod.rs           | 16 ++++------------
 .../extract/extract_docid_word_positions.rs      |  8 ++++----
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs
index 1dc406fb3..f3c11e451 100644
--- a/crates/meilisearch/tests/search/mod.rs
+++ b/crates/meilisearch/tests/search/mod.rs
@@ -69,8 +69,9 @@ async fn search_with_stop_word() {
     let server = Server::new().await;
     let index = server.index("test");
 
-    let (_, code) =
-        index.update_settings(json!({"stopWords": ["the", "a", "an", "to", "in", "of"]})).await;
+    let (_, code) = index
+        .update_settings(json!({"stopWords": ["the", "The", "a", "an", "to", "in", "of"]}))
+        .await;
     meili_snap::snapshot!(code, @"202 Accepted");
 
     let documents = DOCUMENTS.clone();
@@ -81,16 +82,7 @@ async fn search_with_stop_word() {
     index
         .search(json!({"q": "to the", "attributesToHighlight": ["title"], "attributesToRetrieve": ["title"] }), |response, code| {
             assert_eq!(code, 200, "{}", response);
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "How to Train Your Dragon: The Hidden World",
-                "_formatted": {
-                  "title": "How to Train Your Dragon: <em>The</em> Hidden World"
-                }
-              }
-            ]
-            "###);
+            snapshot!(json_string!(response["hits"]), @"[]");
         })
         .await;
 
diff --git a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index ba11ceeb3..16ea92fa4 100644
--- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -57,9 +57,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
         .map(|s| s.iter().map(String::as_str).collect());
     let old_dictionary: Option<Vec<_>> =
         settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let del_builder =
+    let mut del_builder =
         tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
-    let del_tokenizer = del_builder.into_tokenizer();
+    let del_tokenizer = del_builder.build();
 
     let new_stop_words = settings_diff.new.stop_words.as_ref();
     let new_separators: Option<Vec<_>> = settings_diff
@@ -69,9 +69,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
         .map(|s| s.iter().map(String::as_str).collect());
     let new_dictionary: Option<Vec<_>> =
         settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let add_builder =
+    let mut add_builder =
         tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
-    let add_tokenizer = add_builder.into_tokenizer();
+    let add_tokenizer = add_builder.build();
 
     // iterate over documents.
     let mut cursor = obkv_documents.into_cursor()?;

From 8924d486dba1d48be642cd58830cbf7a2f46c515 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 3 Oct 2024 12:04:59 +0200
Subject: [PATCH 006/158] Add a test reproducing the bug

---
 .../tests/search/restrict_searchable.rs       | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/crates/meilisearch/tests/search/restrict_searchable.rs b/crates/meilisearch/tests/search/restrict_searchable.rs
index ca659c518..abd13fadf 100644
--- a/crates/meilisearch/tests/search/restrict_searchable.rs
+++ b/crates/meilisearch/tests/search/restrict_searchable.rs
@@ -367,3 +367,50 @@ async fn search_on_exact_field() {
         })
         .await;
 }
+
+#[actix_rt::test]
+async fn phrase_search_on_title() {
+    let server = Server::new().await;
+    let documents = json!([
+      { "id": 8, "desc": "Document Review", "title": "Document Review Specialist II" },
+      { "id": 5, "desc": "Document Review", "title": "Document Review Attorney" },
+      { "id": 4, "desc": "Document Review", "title": "Document Review Manager - Cyber Incident Response (Remote)" },
+      { "id": 3, "desc": "Document Review", "title": "Document Review Paralegal" },
+      { "id": 2, "desc": "Document Review", "title": "Document Controller (Saudi National)" },
+      { "id": 1, "desc": "Document Review", "title": "Document Reviewer" },
+      { "id": 7, "desc": "Document Review", "title": "Document Review Specialist II" },
+      { "id": 6, "desc": "Document Review", "title": "Document Review (Entry Level)" }
+    ]);
+    let index = index_with_documents(&server, &documents).await;
+
+    index
+        .search(
+            json!({"q": "\"Document Review\"", "attributesToSearchOn": ["title"], "attributesToRetrieve": ["title"]}),
+            |response, code| {
+                snapshot!(code, @"200 OK");
+                snapshot!(json_string!(response["hits"]), @r###"
+                [
+                  {
+                    "title": "Document Review Specialist II"
+                  },
+                  {
+                    "title": "Document Review Attorney"
+                  },
+                  {
+                    "title": "Document Review Manager - Cyber Incident Response (Remote)"
+                  },
+                  {
+                    "title": "Document Review Paralegal"
+                  },
+                  {
+                    "title": "Document Review Specialist II"
+                  },
+                  {
+                    "title": "Document Review (Entry Level)"
+                  }
+                ]
+                "###);
+            },
+        )
+        .await;
+}

From 510ca999962e898c042c08682b5186ff3a3b1e71 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 18 Nov 2024 12:28:03 +0100
Subject: [PATCH 007/158] Fixes #4974

---
 crates/milli/src/search/new/resolve_query_graph.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/search/new/resolve_query_graph.rs b/crates/milli/src/search/new/resolve_query_graph.rs
index 7a47b0a66..4496f8c65 100644
--- a/crates/milli/src/search/new/resolve_query_graph.rs
+++ b/crates/milli/src/search/new/resolve_query_graph.rs
@@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
     if words.is_empty() {
         return Ok(RoaringBitmap::new());
     }
-    let mut candidates = RoaringBitmap::new();
+    let mut candidates = None;
     for word in words.iter().flatten().copied() {
         if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
-            candidates |= word_docids;
+            if let Some(candidates) = candidates.as_mut() {
+                *candidates &= word_docids;
+            } else {
+                candidates = Some(word_docids);
+            }
         } else {
             return Ok(RoaringBitmap::new());
         }
     }
 
+    let Some(mut candidates) = candidates else {
+        return Ok(RoaringBitmap::new());
+    };
+
     let winsize = words.len().min(3);
 
     for win in words.windows(winsize) {

From 25aac45fc7b1ceab292226c2d51a681adbd4b51f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 25 Nov 2024 15:54:43 +0100
Subject: [PATCH 008/158] Expose better error messages

---
 crates/milli/src/error.rs                  |  4 ++++
 crates/milli/src/update/new/channel.rs     | 27 +++++++++++++++++++++-
 crates/milli/src/update/new/indexer/mod.rs | 26 +++++++++++++++++----
 3 files changed, 51 insertions(+), 6 deletions(-)

diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs
index 6c60dcecc..4da57a3e1 100644
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@@ -61,6 +61,10 @@ pub enum InternalError {
     Serialization(#[from] SerializationError),
     #[error(transparent)]
     Store(#[from] MdbError),
+    #[error("Cannot delete {key:?} from database {database_name}: {error}")]
+    StoreDeletion { database_name: &'static str, key: Vec<u8>, error: heed::Error },
+    #[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
+    StorePut { database_name: &'static str, key: Vec<u8>, value_length: usize, error: heed::Error },
     #[error(transparent)]
     Utf8(#[from] str::Utf8Error),
     #[error("An indexation process was explicitly aborted")]
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 3afcd3e4b..dda87d515 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -11,7 +11,7 @@ use super::extract::FacetKind;
 use super::StdResult;
 use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
 use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
-use crate::index::IndexEmbeddingConfig;
+use crate::index::{db_name, IndexEmbeddingConfig};
 use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
 use crate::{DocumentId, Index};
@@ -139,6 +139,27 @@ impl Database {
             Database::FieldIdDocidFacetF64s => index.field_id_docid_facet_f64s.remap_types(),
         }
     }
+
+    pub fn database_name(&self) -> &'static str {
+        match self {
+            Database::Main => db_name::MAIN,
+            Database::Documents => db_name::DOCUMENTS,
+            Database::ExternalDocumentsIds => db_name::EXTERNAL_DOCUMENTS_IDS,
+            Database::ExactWordDocids => db_name::EXACT_WORD_DOCIDS,
+            Database::WordDocids => db_name::WORD_DOCIDS,
+            Database::WordFidDocids => db_name::WORD_FIELD_ID_DOCIDS,
+            Database::WordPositionDocids => db_name::WORD_POSITION_DOCIDS,
+            Database::FidWordCountDocids => db_name::FIELD_ID_WORD_COUNT_DOCIDS,
+            Database::WordPairProximityDocids => db_name::WORD_PAIR_PROXIMITY_DOCIDS,
+            Database::FacetIdIsNullDocids => db_name::FACET_ID_IS_NULL_DOCIDS,
+            Database::FacetIdIsEmptyDocids => db_name::FACET_ID_IS_EMPTY_DOCIDS,
+            Database::FacetIdExistsDocids => db_name::FACET_ID_EXISTS_DOCIDS,
+            Database::FacetIdF64NumberDocids => db_name::FACET_ID_F64_DOCIDS,
+            Database::FacetIdStringDocids => db_name::FACET_ID_STRING_DOCIDS,
+            Database::FieldIdDocidFacetStrings => db_name::FIELD_ID_DOCID_FACET_STRINGS,
+            Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
+        }
+    }
 }
 
 impl From<FacetKind> for Database {
@@ -158,6 +179,10 @@ impl DbOperation {
         self.database.database(index)
     }
 
+    pub fn database_name(&self) -> &'static str {
+        self.database.database_name()
+    }
+
     pub fn entry(self) -> EntryOperation {
         self.entry
     }
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 01ac26503..0f533f5aa 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -41,7 +41,7 @@ use crate::update::settings::InnerIndexSettings;
 use crate::update::{FacetsUpdateBulk, GrenadParameters};
 use crate::vector::{ArroyWrapper, EmbeddingConfigs, Embeddings};
 use crate::{
-    FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
+    Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
     ThreadPoolNoAbortBuilder, UserError,
 };
 
@@ -356,13 +356,29 @@ where
             match operation {
                 WriterOperation::DbOperation(db_operation) => {
                     let database = db_operation.database(index);
+                    let database_name = db_operation.database_name();
                     match db_operation.entry() {
-                        EntryOperation::Delete(e) => {
-                            if !database.delete(wtxn, e.entry())? {
-                                unreachable!("We tried to delete an unknown key")
+                        EntryOperation::Delete(e) => match database.delete(wtxn, e.entry()) {
+                            Ok(false) => unreachable!("We tried to delete an unknown key"),
+                            Ok(_) => (),
+                            Err(error) => {
+                                return Err(Error::InternalError(InternalError::StoreDeletion {
+                                    database_name,
+                                    key: e.entry().to_owned(),
+                                    error,
+                                }));
+                            }
+                        },
+                        EntryOperation::Write(e) => {
+                            if let Err(error) = database.put(wtxn, e.key(), e.value()) {
+                                return Err(Error::InternalError(InternalError::StorePut {
+                                    database_name,
+                                    key: e.key().to_owned(),
+                                    value_length: e.value().len(),
+                                    error,
+                                }));
                             }
                         }
-                        EntryOperation::Write(e) => database.put(wtxn, e.key(), e.value())?,
                     }
                 }
                 WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {

From a3103f347e3008247799506009eba19e6aa9171f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 25 Nov 2024 16:05:31 +0100
Subject: [PATCH 009/158] Fix the facet f64 database name

---
 crates/milli/src/update/new/channel.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index dda87d515..00b471b52 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -112,7 +112,7 @@ pub enum Database {
     FacetIdIsNullDocids,
     FacetIdIsEmptyDocids,
     FacetIdExistsDocids,
-    FacetIdF64NumberDocids,
+    FacetIdF64Docids,
     FacetIdStringDocids,
     FieldIdDocidFacetStrings,
     FieldIdDocidFacetF64s,
@@ -133,7 +133,7 @@ impl Database {
             Database::FacetIdIsNullDocids => index.facet_id_is_null_docids.remap_types(),
             Database::FacetIdIsEmptyDocids => index.facet_id_is_empty_docids.remap_types(),
             Database::FacetIdExistsDocids => index.facet_id_exists_docids.remap_types(),
-            Database::FacetIdF64NumberDocids => index.facet_id_f64_docids.remap_types(),
+            Database::FacetIdF64Docids => index.facet_id_f64_docids.remap_types(),
             Database::FacetIdStringDocids => index.facet_id_string_docids.remap_types(),
             Database::FieldIdDocidFacetStrings => index.field_id_docid_facet_strings.remap_types(),
             Database::FieldIdDocidFacetF64s => index.field_id_docid_facet_f64s.remap_types(),
@@ -154,7 +154,7 @@ impl Database {
             Database::FacetIdIsNullDocids => db_name::FACET_ID_IS_NULL_DOCIDS,
             Database::FacetIdIsEmptyDocids => db_name::FACET_ID_IS_EMPTY_DOCIDS,
             Database::FacetIdExistsDocids => db_name::FACET_ID_EXISTS_DOCIDS,
-            Database::FacetIdF64NumberDocids => db_name::FACET_ID_F64_DOCIDS,
+            Database::FacetIdF64Docids => db_name::FACET_ID_F64_DOCIDS,
             Database::FacetIdStringDocids => db_name::FACET_ID_STRING_DOCIDS,
             Database::FieldIdDocidFacetStrings => db_name::FIELD_ID_DOCID_FACET_STRINGS,
             Database::FieldIdDocidFacetF64s => db_name::FIELD_ID_DOCID_FACET_F64S,
@@ -165,7 +165,7 @@ impl Database {
 impl From<FacetKind> for Database {
     fn from(value: FacetKind) -> Self {
         match value {
-            FacetKind::Number => Database::FacetIdF64NumberDocids,
+            FacetKind::Number => Database::FacetIdF64Docids,
             FacetKind::String => Database::FacetIdStringDocids,
             FacetKind::Null => Database::FacetIdIsNullDocids,
             FacetKind::Empty => Database::FacetIdIsEmptyDocids,

From 5606679c53a507b3778b957afe8d1b16e865a2d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 25 Nov 2024 16:24:59 +0100
Subject: [PATCH 010/158] Use the obkv and grenad crates.io versions

---
 Cargo.lock                    | 9 +++++----
 crates/meilisearch/Cargo.toml | 2 +-
 crates/milli/Cargo.toml       | 7 ++-----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 2de9007f5..d94ff0804 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2263,13 +2263,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
 [[package]]
 name = "grenad"
-version = "0.4.7"
-source = "git+https://github.com/meilisearch/grenad?branch=various-improvements#58ac87d852413571102f44c5e55ca13509a3f1a0"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e2ac9baf835ee2a7f0622a5617792ced6f65af25994078c343d429431ef2bbc"
 dependencies = [
  "bytemuck",
  "byteorder",
  "either",
- "rayon",
  "tempfile",
 ]
 
@@ -3912,7 +3912,8 @@ dependencies = [
 [[package]]
 name = "obkv"
 version = "0.3.0"
-source = "git+https://github.com/kerollmops/obkv?branch=unsized-kvreader#ce535874008ecac554f02e0c670e6caf62134d6b"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae4512a8f418ac322335255a72361b9ac927e106f4d7fe6ab4d8ac59cb01f7a9"
 
 [[package]]
 name = "once_cell"
diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml
index b11d90151..2884f0c9c 100644
--- a/crates/meilisearch/Cargo.toml
+++ b/crates/meilisearch/Cargo.toml
@@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
 mimalloc = { version = "0.1.43", default-features = false }
 mime = "0.3.17"
 num_cpus = "1.16.0"
-obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
+obkv = "0.3.0"
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 parking_lot = "0.12.3"
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index c47a0a354..ccf6877cd 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -28,10 +28,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.7", default-features = false, features = [
-    "rayon",    # TODO Should we keep this feature
-    "tempfile",
-], git = "https://github.com/meilisearch/grenad", branch = "various-improvements" }
+grenad = { version = "0.5.0", default-features = false, features = ["tempfile"] }
 heed = { version = "0.20.3", default-features = false, features = [
     "serde-json",
     "serde-bincode",
@@ -42,7 +39,7 @@ json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memchr = "2.5.0"
 memmap2 = "0.9.4"
-obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
+obkv = "0.3.0"
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 rayon = "1.10.0"

From b4fb2dabd46f40c6ae8f320a77ba4d9342cfefbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 25 Nov 2024 16:31:21 +0100
Subject: [PATCH 011/158] Use the grenad rayon feature

---
 Cargo.lock              | 1 +
 crates/milli/Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index d94ff0804..0f2a13125 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2270,6 +2270,7 @@ dependencies = [
  "bytemuck",
  "byteorder",
  "either",
+ "rayon",
  "tempfile",
 ]
 
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index ccf6877cd..1a3bfbcf1 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -28,7 +28,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.5.0", default-features = false, features = ["tempfile"] }
+grenad = { version = "0.5.0", default-features = false, features = ["rayon", "tempfile"] }
 heed = { version = "0.20.3", default-features = false, features = [
     "serde-json",
     "serde-bincode",

From d66dc363ed5cf3e7c1a7a59c05bed1722338e0ac Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 19 Nov 2024 15:57:56 +0100
Subject: [PATCH 012/158] Test and implement settings opt-out

---
 crates/dump/src/lib.rs                        |   2 +
 crates/dump/src/reader/compat/v5_to_v6.rs     |   2 +
 .../after_adding_the_documents.snap           |   3 +-
 .../after_adding_the_settings.snap            |   3 +-
 .../after_removing_the_documents.snap         |   3 +-
 .../registered_the_document_deletions.snap    |   3 +-
 ...red_the_setting_and_document_addition.snap |   3 +-
 .../Intel to kefir succeeds.snap              |   5 +-
 .../lib.rs/import_vectors/Intel to kefir.snap |   5 +-
 .../import_vectors/adding Intel succeeds.snap |   5 +-
 .../import_vectors/after adding Intel.snap    |   3 +-
 ...ter_registering_settings_task_vectors.snap |   3 +-
 .../settings_update_processed_vectors.snap    |   3 +-
 .../after_registering_settings_task.snap      |   3 +-
 .../settings_update_processed.snap            |   3 +-
 crates/meilisearch-types/src/error.rs         |   2 +
 crates/meilisearch-types/src/settings.rs      |  71 ++-
 .../src/routes/indexes/settings.rs            |  26 +
 .../src/routes/indexes/settings_analytics.rs  |  45 +-
 crates/meilisearch/tests/dumps/mod.rs         |  56 ++-
 .../meilisearch/tests/search/facet_search.rs  | 112 +++++
 .../tests/settings/get_settings.rs            |   8 +-
 crates/meilisearch/tests/settings/mod.rs      |   1 +
 .../tests/settings/prefix_search_settings.rs  | 458 ++++++++++++++++++
 crates/milli/src/index.rs                     |  61 ++-
 crates/milli/src/search/new/mod.rs            |   9 +
 .../src/search/new/query_term/parse_query.rs  |   3 +-
 crates/milli/src/update/facet/mod.rs          |   7 +
 .../extract/extract_facet_string_docids.rs    |  31 +-
 .../extract/extract_fid_docid_facet_values.rs |   8 +-
 .../milli/src/update/index_documents/mod.rs   |  35 +-
 .../src/update/index_documents/transform.rs   |  25 +-
 crates/milli/src/update/new/indexer/mod.rs    |   5 +-
 .../milli/src/update/new/word_fst_builder.rs  |   6 +-
 crates/milli/src/update/settings.rs           |  86 +++-
 crates/milli/src/update/words_prefixes_fst.rs |   8 +-
 36 files changed, 1018 insertions(+), 94 deletions(-)
 create mode 100644 crates/meilisearch/tests/settings/prefix_search_settings.rs

diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs
index 8bed7f0d4..31cd3028e 100644
--- a/crates/dump/src/lib.rs
+++ b/crates/dump/src/lib.rs
@@ -292,6 +292,8 @@ pub(crate) mod test {
             embedders: Setting::NotSet,
             search_cutoff_ms: Setting::NotSet,
             localized_attributes: Setting::NotSet,
+            facet_search: Setting::NotSet,
+            prefix_search: Setting::NotSet,
             _kind: std::marker::PhantomData,
         };
         settings.check()
diff --git a/crates/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs
index 785542cce..6b2655bdf 100644
--- a/crates/dump/src/reader/compat/v5_to_v6.rs
+++ b/crates/dump/src/reader/compat/v5_to_v6.rs
@@ -382,6 +382,8 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
             embedders: v6::Setting::NotSet,
             localized_attributes: v6::Setting::NotSet,
             search_cutoff_ms: v6::Setting::NotSet,
+            facet_search: v6::Setting::NotSet,
+            prefix_search: v6::Setting::NotSet,
             _kind: std::marker::PhantomData,
         }
     }
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap
index 8d175e388..bda90680f 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap
index d1de7ec61..be79abf21 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
index 114df2852..492eae3dd 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
 3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap
index b2b368be4..43be57779 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
 3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap
index 9e1995fee..ca1866473 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap
index 11995b0bd..f581defa8 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap	
+++ b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap	
@@ -1,13 +1,12 @@
 ---
-source: crates/crates/index-scheduler/src/lib.rs
-snapshot_kind: text
+source: crates/index-scheduler/src/lib.rs
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap
index 9c028d141..27522376f 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap	
+++ b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap	
@@ -1,13 +1,12 @@
 ---
-source: crates/crates/index-scheduler/src/lib.rs
-snapshot_kind: text
+source: crates/index-scheduler/src/lib.rs
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap
index 5c83f6cac..28504ffea 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap	
+++ b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap	
@@ -1,13 +1,12 @@
 ---
-source: crates/crates/index-scheduler/src/lib.rs
-snapshot_kind: text
+source: crates/index-scheduler/src/lib.rs
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap
index c8f174c74..288f2bc88 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap	
+++ b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap	
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
index f9e6df03e..ff63c0caf 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
index 24d5fff27..77367f06b 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
index 22900371e..e2668fcea 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
index dae9b38cd..7f08c0575 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
@@ -1,13 +1,12 @@
 ---
 source: crates/index-scheduler/src/lib.rs
-snapshot_kind: text
 ---
 ### Autobatching Enabled = true
 ### Processing batch None:
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs
index 00f88b7b4..4b930bf8d 100644
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@@ -290,6 +290,8 @@ InvalidSearchDistinct                 , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDisplayedAttributes    , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDistinctAttribute      , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsProximityPrecision     , InvalidRequest       , BAD_REQUEST ;
+InvalidSettingsFacetSearch            , InvalidRequest       , BAD_REQUEST ;
+InvalidSettingsPrefixSearch           , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsFaceting               , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsFilterableAttributes   , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsPagination             , InvalidRequest       , BAD_REQUEST ;
diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs
index e3803fa28..48481e364 100644
--- a/crates/meilisearch-types/src/settings.rs
+++ b/crates/meilisearch-types/src/settings.rs
@@ -8,7 +8,7 @@ use std::str::FromStr;
 
 use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
 use fst::IntoStreamer;
-use milli::index::IndexEmbeddingConfig;
+use milli::index::{IndexEmbeddingConfig, PrefixSearch};
 use milli::proximity::ProximityPrecision;
 use milli::update::Setting;
 use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
@@ -202,6 +202,12 @@ pub struct Settings<T> {
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
     #[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
     pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default, error = DeserrJsonError<InvalidSettingsFacetSearch>)]
+    pub facet_search: Setting<bool>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default, error = DeserrJsonError<InvalidSettingsPrefixSearch>)]
+    pub prefix_search: Setting<PrefixSearchSettings>,
 
     #[serde(skip)]
     #[deserr(skip)]
@@ -266,6 +272,8 @@ impl Settings<Checked> {
             embedders: Setting::Reset,
             search_cutoff_ms: Setting::Reset,
             localized_attributes: Setting::Reset,
+            facet_search: Setting::NotSet,
+            prefix_search: Setting::NotSet,
             _kind: PhantomData,
         }
     }
@@ -290,6 +298,8 @@ impl Settings<Checked> {
             embedders,
             search_cutoff_ms,
             localized_attributes: localized_attributes_rules,
+            facet_search,
+            prefix_search,
             _kind,
         } = self;
 
@@ -312,6 +322,8 @@ impl Settings<Checked> {
             embedders,
             search_cutoff_ms,
             localized_attributes: localized_attributes_rules,
+            facet_search,
+            prefix_search,
             _kind: PhantomData,
         }
     }
@@ -360,6 +372,8 @@ impl Settings<Unchecked> {
             embedders: self.embedders,
             search_cutoff_ms: self.search_cutoff_ms,
             localized_attributes: self.localized_attributes,
+            facet_search: self.facet_search,
+            prefix_search: self.prefix_search,
             _kind: PhantomData,
         }
     }
@@ -433,6 +447,8 @@ impl Settings<Unchecked> {
                     Setting::Set(this)
                 }
             },
+            prefix_search: other.prefix_search.or(self.prefix_search),
+            facet_search: other.facet_search.or(self.facet_search),
             _kind: PhantomData,
         }
     }
@@ -469,6 +485,8 @@ pub fn apply_settings_to_builder(
         embedders,
         search_cutoff_ms,
         localized_attributes: localized_attributes_rules,
+        facet_search,
+        prefix_search,
         _kind,
     } = settings;
 
@@ -657,6 +675,20 @@ pub fn apply_settings_to_builder(
         Setting::Reset => builder.reset_search_cutoff(),
         Setting::NotSet => (),
     }
+
+    match prefix_search {
+        Setting::Set(prefix_search) => {
+            builder.set_prefix_search(PrefixSearch::from(*prefix_search))
+        }
+        Setting::Reset => builder.reset_prefix_search(),
+        Setting::NotSet => (),
+    }
+
+    match facet_search {
+        Setting::Set(facet_search) => builder.set_facet_search(*facet_search),
+        Setting::Reset => builder.reset_facet_search(),
+        Setting::NotSet => (),
+    }
 }
 
 pub enum SecretPolicy {
@@ -755,6 +787,10 @@ pub fn settings(
 
     let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
 
+    let prefix_search = index.prefix_search(rtxn)?.map(PrefixSearchSettings::from);
+
+    let facet_search = index.facet_search(rtxn)?;
+
     let mut settings = Settings {
         displayed_attributes: match displayed_attributes {
             Some(attrs) => Setting::Set(attrs),
@@ -791,13 +827,14 @@ pub fn settings(
             Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()),
             None => Setting::Reset,
         },
+        prefix_search: Setting::Set(prefix_search.unwrap_or_default()),
+        facet_search: Setting::Set(facet_search),
         _kind: PhantomData,
     };
 
     if let SecretPolicy::HideSecrets = secret_policy {
         settings.hide_secrets()
     }
-
     Ok(settings)
 }
 
@@ -964,6 +1001,32 @@ impl std::ops::Deref for WildcardSetting {
     }
 }
 
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
+#[serde(deny_unknown_fields, rename_all = "camelCase")]
+#[deserr(error = DeserrJsonError<InvalidSettingsPrefixSearch>, rename_all = camelCase, deny_unknown_fields)]
+pub enum PrefixSearchSettings {
+    #[default]
+    IndexingTime,
+    Disabled,
+}
+
+impl From<PrefixSearch> for PrefixSearchSettings {
+    fn from(value: PrefixSearch) -> Self {
+        match value {
+            PrefixSearch::IndexingTime => PrefixSearchSettings::IndexingTime,
+            PrefixSearch::Disabled => PrefixSearchSettings::Disabled,
+        }
+    }
+}
+impl From<PrefixSearchSettings> for PrefixSearch {
+    fn from(value: PrefixSearchSettings) -> Self {
+        match value {
+            PrefixSearchSettings::IndexingTime => PrefixSearch::IndexingTime,
+            PrefixSearchSettings::Disabled => PrefixSearch::Disabled,
+        }
+    }
+}
+
 #[cfg(test)]
 pub(crate) mod test {
     use super::*;
@@ -990,6 +1053,8 @@ pub(crate) mod test {
             embedders: Setting::NotSet,
             localized_attributes: Setting::NotSet,
             search_cutoff_ms: Setting::NotSet,
+            facet_search: Setting::NotSet,
+            prefix_search: Setting::NotSet,
             _kind: PhantomData::<Unchecked>,
         };
 
@@ -1019,6 +1084,8 @@ pub(crate) mod test {
             embedders: Setting::NotSet,
             localized_attributes: Setting::NotSet,
             search_cutoff_ms: Setting::NotSet,
+            facet_search: Setting::NotSet,
+            prefix_search: Setting::NotSet,
             _kind: PhantomData::<Unchecked>,
         };
 
diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs
index a9d8d3053..e1794535b 100644
--- a/crates/meilisearch/src/routes/indexes/settings.rs
+++ b/crates/meilisearch/src/routes/indexes/settings.rs
@@ -369,6 +369,30 @@ make_setting_route!(
     SearchCutoffMsAnalytics
 );
 
+make_setting_route!(
+    "/facet-search",
+    put,
+    bool,
+    meilisearch_types::deserr::DeserrJsonError<
+        meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
+    >,
+    facet_search,
+    "facetSearch",
+    FacetSearchAnalytics
+);
+
+make_setting_route!(
+    "/prefix-search",
+    put,
+    meilisearch_types::settings::PrefixSearchSettings,
+    meilisearch_types::deserr::DeserrJsonError<
+        meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
+    >,
+    prefix_search,
+    "prefixSearch",
+    PrefixSearchAnalytics
+);
+
 macro_rules! generate_configure {
     ($($mod:ident),*) => {
         pub fn configure(cfg: &mut web::ServiceConfig) {
@@ -456,6 +480,8 @@ pub async fn update_all(
             non_separator_tokens: NonSeparatorTokensAnalytics::new(
                 new_settings.non_separator_tokens.as_ref().set(),
             ),
+            facet_search: FacetSearchAnalytics::new(new_settings.facet_search.as_ref().set()),
+            prefix_search: PrefixSearchAnalytics::new(new_settings.prefix_search.as_ref().set()),
         },
         &req,
     );
diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs
index 32bddcbdd..ddca2c00a 100644
--- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs
+++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs
@@ -10,7 +10,8 @@ use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
 use meilisearch_types::milli::update::Setting;
 use meilisearch_types::milli::vector::settings::EmbeddingSettings;
 use meilisearch_types::settings::{
-    FacetingSettings, PaginationSettings, ProximityPrecisionView, RankingRuleView, TypoSettings,
+    FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView,
+    RankingRuleView, TypoSettings,
 };
 use serde::Serialize;
 
@@ -36,6 +37,8 @@ pub struct SettingsAnalytics {
     pub dictionary: DictionaryAnalytics,
     pub separator_tokens: SeparatorTokensAnalytics,
     pub non_separator_tokens: NonSeparatorTokensAnalytics,
+    pub facet_search: FacetSearchAnalytics,
+    pub prefix_search: PrefixSearchAnalytics,
 }
 
 impl Aggregate for SettingsAnalytics {
@@ -183,6 +186,14 @@ impl Aggregate for SettingsAnalytics {
             non_separator_tokens: NonSeparatorTokensAnalytics {
                 total: new.non_separator_tokens.total.or(self.non_separator_tokens.total),
             },
+            facet_search: FacetSearchAnalytics {
+                set: new.facet_search.set | self.facet_search.set,
+                value: new.facet_search.value.or(self.facet_search.value),
+            },
+            prefix_search: PrefixSearchAnalytics {
+                set: new.prefix_search.set | self.prefix_search.set,
+                value: new.prefix_search.value.or(self.prefix_search.value),
+            },
         })
     }
 
@@ -620,3 +631,35 @@ impl NonSeparatorTokensAnalytics {
         SettingsAnalytics { non_separator_tokens: self, ..Default::default() }
     }
 }
+
+#[derive(Serialize, Default)]
+pub struct FacetSearchAnalytics {
+    pub set: bool,
+    pub value: Option<bool>,
+}
+
+impl FacetSearchAnalytics {
+    pub fn new(settings: Option<&bool>) -> Self {
+        Self { set: settings.is_some(), value: settings.copied() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { facet_search: self, ..Default::default() }
+    }
+}
+
+#[derive(Serialize, Default)]
+pub struct PrefixSearchAnalytics {
+    pub set: bool,
+    pub value: Option<PrefixSearchSettings>,
+}
+
+impl PrefixSearchAnalytics {
+    pub fn new(settings: Option<&PrefixSearchSettings>) -> Self {
+        Self { set: settings.is_some(), value: settings.cloned() }
+    }
+
+    pub fn into_settings(self) -> SettingsAnalytics {
+        SettingsAnalytics { prefix_search: self, ..Default::default() }
+    }
+}
diff --git a/crates/meilisearch/tests/dumps/mod.rs b/crates/meilisearch/tests/dumps/mod.rs
index c7d157b00..dbbd1abf0 100644
--- a/crates/meilisearch/tests/dumps/mod.rs
+++ b/crates/meilisearch/tests/dumps/mod.rs
@@ -79,7 +79,9 @@ async fn import_dump_v1_movie_raw() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -242,7 +244,9 @@ async fn import_dump_v1_movie_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -391,7 +395,9 @@ async fn import_dump_v1_rubygems_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -526,7 +532,9 @@ async fn import_dump_v2_movie_raw() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -673,7 +681,9 @@ async fn import_dump_v2_movie_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -819,7 +829,9 @@ async fn import_dump_v2_rubygems_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -954,7 +966,9 @@ async fn import_dump_v3_movie_raw() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -1101,7 +1115,9 @@ async fn import_dump_v3_movie_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -1247,7 +1263,9 @@ async fn import_dump_v3_rubygems_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -1382,7 +1400,9 @@ async fn import_dump_v4_movie_raw() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -1529,7 +1549,9 @@ async fn import_dump_v4_movie_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -1675,7 +1697,9 @@ async fn import_dump_v4_rubygems_with_settings() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###
     );
@@ -1922,7 +1946,9 @@ async fn import_dump_v6_containing_experimental_features() {
         "maxTotalHits": 1000
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###);
 
@@ -2102,7 +2128,9 @@ async fn generate_and_import_dump_containing_vectors() {
         }
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###);
 
diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs
index 12d2226a9..52b8171c4 100644
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@@ -200,3 +200,115 @@ async fn simple_facet_search_with_sort_by_count() {
     assert_eq!(hits[0], json!({ "value": "Action", "count": 3 }));
     assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 }));
 }
+
+#[actix_rt::test]
+async fn add_documents_and_deactivate_facet_search() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+    let (response, code) = index
+        .update_settings(json!({
+            "facetSearch": false,
+            "filterableAttributes": ["genres"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(1).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
+}
+
+#[actix_rt::test]
+async fn deactivate_facet_search_and_add_documents() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "facetSearch": false,
+            "filterableAttributes": ["genres"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
+}
+
+#[actix_rt::test]
+async fn deactivate_facet_search_add_documents_and_activate_facet_search() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "facetSearch": false,
+            "filterableAttributes": ["genres"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "facetSearch": true,
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(2).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
+}
+
+#[actix_rt::test]
+async fn deactivate_facet_search_add_documents_and_reset_facet_search() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "facetSearch": false,
+            "filterableAttributes": ["genres"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "facetSearch": serde_json::Value::Null,
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(2).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
+}
diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs
index 6de0db0b3..1b1964680 100644
--- a/crates/meilisearch/tests/settings/get_settings.rs
+++ b/crates/meilisearch/tests/settings/get_settings.rs
@@ -56,7 +56,7 @@ async fn get_settings() {
     let (response, code) = index.settings().await;
     assert_eq!(code, 200);
     let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 17);
+    assert_eq!(settings.keys().len(), 19);
     assert_eq!(settings["displayedAttributes"], json!(["*"]));
     assert_eq!(settings["searchableAttributes"], json!(["*"]));
     assert_eq!(settings["filterableAttributes"], json!([]));
@@ -87,6 +87,8 @@ async fn get_settings() {
     );
     assert_eq!(settings["proximityPrecision"], json!("byWord"));
     assert_eq!(settings["searchCutoffMs"], json!(null));
+    assert_eq!(settings["prefixSearch"], json!("indexingTime"));
+    assert_eq!(settings["facetSearch"], json!(true));
 }
 
 #[actix_rt::test]
@@ -199,7 +201,9 @@ async fn secrets_are_hidden_in_settings() {
         }
       },
       "searchCutoffMs": null,
-      "localizedAttributes": null
+      "localizedAttributes": null,
+      "facetSearch": true,
+      "prefixSearch": "indexingTime"
     }
     "###);
 
diff --git a/crates/meilisearch/tests/settings/mod.rs b/crates/meilisearch/tests/settings/mod.rs
index ccb4139e6..67df4068a 100644
--- a/crates/meilisearch/tests/settings/mod.rs
+++ b/crates/meilisearch/tests/settings/mod.rs
@@ -1,5 +1,6 @@
 mod distinct;
 mod errors;
 mod get_settings;
+mod prefix_search_settings;
 mod proximity_settings;
 mod tokenizer_customization;
diff --git a/crates/meilisearch/tests/settings/prefix_search_settings.rs b/crates/meilisearch/tests/settings/prefix_search_settings.rs
new file mode 100644
index 000000000..34a891f97
--- /dev/null
+++ b/crates/meilisearch/tests/settings/prefix_search_settings.rs
@@ -0,0 +1,458 @@
+use meili_snap::{json_string, snapshot};
+use once_cell::sync::Lazy;
+
+use crate::common::Server;
+use crate::json;
+
+static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
+    json!([
+        {
+            "id": 1,
+            "a": "Soup of the day",
+            "b": "manythefishou",
+        },
+        {
+            "id": 2,
+            "a": "Soup of day so",
+            "b": "manythe manythelazyfish",
+        },
+        {
+            "id": 3,
+            "a": "the Soup of day",
+            "b": "manythelazyfish",
+        },
+    ])
+});
+
+#[actix_rt::test]
+async fn add_docs_and_disable() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(0).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "prefixSearch": "disabled",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(1).await;
+
+    // only 1 document should match
+    index
+        .search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day <em>so</em>",
+                  "b": "manythe manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // only 1 document should match
+    index
+        .search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day so",
+                  "b": "<em>manythe</em> manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn disable_and_add_docs() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "prefixSearch": "disabled",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(1).await;
+
+    // only 1 document should match
+    index
+        .search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day <em>so</em>",
+                  "b": "manythe manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day so",
+                  "b": "<em>manythe</em> manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn disable_add_docs_and_enable() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "prefixSearch": "disabled",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(1).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "prefixSearch": "indexingTime",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(2).await;
+
+    // all documents should match
+    index
+        .search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "manythefishou",
+                "_formatted": {
+                  "id": "1",
+                  "a": "<em>So</em>up of the day",
+                  "b": "manythefishou"
+                }
+              },
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "<em>So</em>up of day <em>so</em>",
+                  "b": "manythe manythelazyfish"
+                }
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "manythelazyfish",
+                "_formatted": {
+                  "id": "3",
+                  "a": "the <em>So</em>up of day",
+                  "b": "manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "manythefishou",
+                "_formatted": {
+                  "id": "1",
+                  "a": "Soup of the day",
+                  "b": "<em>manythe</em>fishou"
+                }
+              },
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day so",
+                  "b": "<em>manythe</em> <em>manythe</em>lazyfish"
+                }
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "manythelazyfish",
+                "_formatted": {
+                  "id": "3",
+                  "a": "the Soup of day",
+                  "b": "<em>manythe</em>lazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn disable_add_docs_and_reset() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "prefixSearch": "disabled",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(1).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "prefixSearch": serde_json::Value::Null,
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(2).await;
+
+    // all documents should match
+    index
+        .search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "manythefishou",
+                "_formatted": {
+                  "id": "1",
+                  "a": "<em>So</em>up of the day",
+                  "b": "manythefishou"
+                }
+              },
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "<em>So</em>up of day <em>so</em>",
+                  "b": "manythe manythelazyfish"
+                }
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "manythelazyfish",
+                "_formatted": {
+                  "id": "3",
+                  "a": "the <em>So</em>up of day",
+                  "b": "manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "manythefishou",
+                "_formatted": {
+                  "id": "1",
+                  "a": "Soup of the day",
+                  "b": "<em>manythe</em>fishou"
+                }
+              },
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day so",
+                  "b": "<em>manythe</em> <em>manythe</em>lazyfish"
+                }
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "manythelazyfish",
+                "_formatted": {
+                  "id": "3",
+                  "a": "the Soup of day",
+                  "b": "<em>manythe</em>lazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn default_behavior() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(1).await;
+
+    // all documents should match
+    index
+        .search(json!({"q": "so", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "manythefishou",
+                "_formatted": {
+                  "id": "1",
+                  "a": "<em>So</em>up of the day",
+                  "b": "manythefishou"
+                }
+              },
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "<em>So</em>up of day <em>so</em>",
+                  "b": "manythe manythelazyfish"
+                }
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "manythelazyfish",
+                "_formatted": {
+                  "id": "3",
+                  "a": "the <em>So</em>up of day",
+                  "b": "manythelazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "manythe", "attributesToHighlight": ["a", "b"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "manythefishou",
+                "_formatted": {
+                  "id": "1",
+                  "a": "Soup of the day",
+                  "b": "<em>manythe</em>fishou"
+                }
+              },
+              {
+                "id": 2,
+                "a": "Soup of day so",
+                "b": "manythe manythelazyfish",
+                "_formatted": {
+                  "id": "2",
+                  "a": "Soup of day so",
+                  "b": "<em>manythe</em> <em>manythe</em>lazyfish"
+                }
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "manythelazyfish",
+                "_formatted": {
+                  "id": "3",
+                  "a": "the Soup of day",
+                  "b": "<em>manythe</em>lazyfish"
+                }
+              }
+            ]
+            "###);
+        })
+        .await;
+}
diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs
index 89f965b7c..5bd24b9e4 100644
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -70,6 +70,8 @@ pub mod main_key {
     pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
     pub const SEARCH_CUTOFF: &str = "search_cutoff";
     pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
+    pub const FACET_SEARCH: &str = "facet_search";
+    pub const PREFIX_SEARCH: &str = "prefix_search";
 }
 
 pub mod db_name {
@@ -1233,6 +1235,10 @@ impl Index {
         )
     }
 
+    pub(crate) fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
+    }
+
     /// Returns the FST which is the words prefixes dictionary of the engine.
     pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
         match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
@@ -1562,6 +1568,41 @@ impl Index {
         self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
     }
 
+    pub fn prefix_search(&self, txn: &RoTxn<'_>) -> heed::Result<Option<PrefixSearch>> {
+        self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().get(txn, main_key::PREFIX_SEARCH)
+    }
+
+    pub(crate) fn put_prefix_search(
+        &self,
+        txn: &mut RwTxn<'_>,
+        val: PrefixSearch,
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().put(
+            txn,
+            main_key::PREFIX_SEARCH,
+            &val,
+        )
+    }
+
+    pub(crate) fn delete_prefix_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(txn, main_key::PREFIX_SEARCH)
+    }
+
+    pub fn facet_search(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
+        self.main
+            .remap_types::<Str, SerdeBincode<bool>>()
+            .get(txn, main_key::FACET_SEARCH)
+            .map(|v| v.unwrap_or(true))
+    }
+
+    pub(crate) fn put_facet_search(&self, txn: &mut RwTxn<'_>, val: bool) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeBincode<bool>>().put(txn, main_key::FACET_SEARCH, &val)
+    }
+
+    pub(crate) fn delete_facet_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
+    }
+
     pub fn localized_attributes_rules(
         &self,
         rtxn: &RoTxn<'_>,
@@ -1647,10 +1688,14 @@ impl Index {
         Ok(res)
     }
 
-    pub fn prefix_settings(&self, _rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
+    pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
+        let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
         Ok(PrefixSettings {
-            compute_prefixes: true,
+            compute_prefixes,
             max_prefix_length: 4,
+            #[cfg(not(test))]
+            prefix_count_threshold: 100,
+            #[cfg(test)]
             prefix_count_threshold: 100,
         })
     }
@@ -1665,9 +1710,17 @@ pub struct IndexEmbeddingConfig {
 
 #[derive(Debug, Deserialize, Serialize)]
 pub struct PrefixSettings {
-    pub prefix_count_threshold: u64,
+    pub prefix_count_threshold: usize,
     pub max_prefix_length: usize,
-    pub compute_prefixes: bool,
+    pub compute_prefixes: PrefixSearch,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "camelCase")]
+pub enum PrefixSearch {
+    #[default]
+    IndexingTime,
+    Disabled,
 }
 
 #[derive(Serialize, Deserialize)]
diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs
index f7c590360..4edcd09de 100644
--- a/crates/milli/src/search/new/mod.rs
+++ b/crates/milli/src/search/new/mod.rs
@@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
 use self::graph_based_ranking_rule::Words;
 use self::interner::Interned;
 use self::vector_sort::VectorSort;
+use crate::index::PrefixSearch;
 use crate::localized_attributes_rules::LocalizedFieldIds;
 use crate::score_details::{ScoreDetails, ScoringStrategy};
 use crate::search::new::distinct::apply_distinct_rule;
@@ -68,6 +69,7 @@ pub struct SearchContext<'ctx> {
     pub term_interner: Interner<QueryTerm>,
     pub phrase_docids: PhraseDocIdsCache,
     pub restricted_fids: Option<RestrictedFids>,
+    pub prefix_search: PrefixSearch,
 }
 
 impl<'ctx> SearchContext<'ctx> {
@@ -85,6 +87,8 @@ impl<'ctx> SearchContext<'ctx> {
             }
         }
 
+        let prefix_search = index.prefix_search(txn)?.unwrap_or_default();
+
         Ok(Self {
             index,
             txn,
@@ -94,9 +98,14 @@ impl<'ctx> SearchContext<'ctx> {
             term_interner: <_>::default(),
             phrase_docids: <_>::default(),
             restricted_fids: None,
+            prefix_search,
         })
     }
 
+    pub fn is_prefix_search_allowed(&self) -> bool {
+        self.prefix_search != PrefixSearch::Disabled
+    }
+
     pub fn attributes_to_search_on(
         &mut self,
         attributes_to_search_on: &'ctx [String],
diff --git a/crates/milli/src/search/new/query_term/parse_query.rs b/crates/milli/src/search/new/query_term/parse_query.rs
index bb98f19ce..a76fd6525 100644
--- a/crates/milli/src/search/new/query_term/parse_query.rs
+++ b/crates/milli/src/search/new/query_term/parse_query.rs
@@ -28,6 +28,7 @@ pub fn located_query_terms_from_tokens(
     words_limit: Option<usize>,
 ) -> Result<ExtractedTokens> {
     let nbr_typos = number_of_typos_allowed(ctx)?;
+    let allow_prefix_search = ctx.is_prefix_search_allowed();
 
     let mut query_terms = Vec::new();
 
@@ -94,7 +95,7 @@ pub fn located_query_terms_from_tokens(
                         ctx,
                         word,
                         nbr_typos(word),
-                        true,
+                        allow_prefix_search,
                         false,
                     )?;
                     let located_term = LocatedQueryTerm {
diff --git a/crates/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs
index 2e592519b..f4835e6a8 100644
--- a/crates/milli/src/update/facet/mod.rs
+++ b/crates/milli/src/update/facet/mod.rs
@@ -173,6 +173,13 @@ impl<'i> FacetsUpdate<'i> {
         }
 
         match self.normalized_delta_data {
+            _ if !self.index.facet_search(wtxn)? => {
+                // If facet search is disabled, we don't need to compute facet search databases.
+                // We clear the facet search databases.
+                self.index.facet_id_string_fst.clear(wtxn)?;
+                self.index.facet_id_normalized_string_strings.clear(wtxn)?;
+                return Ok(());
+            }
             Some(data) => index_facet_search(wtxn, data, self.index),
             None => Ok(()),
         }
diff --git a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
index e0d7e1386..d330ea5a0 100644
--- a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@@ -34,10 +34,12 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
         extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
     } else {
         let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
+        let facet_search = settings_diff.new.facet_search;
         extract_facet_string_docids_document_update(
             docid_fid_facet_string,
             indexer,
             localized_field_ids,
+            facet_search,
         )
     }
 }
@@ -51,6 +53,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
     docid_fid_facet_string: grenad::Reader<R>,
     indexer: GrenadParameters,
     localized_field_ids: &LocalizedFieldIds,
+    facet_search: bool,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
     let max_memory = indexer.max_memory_by_thread();
 
@@ -96,7 +99,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
         let normalized_value = str::from_utf8(normalized_value_bytes)?;
 
         // Facet search normalization
-        {
+        if facet_search {
             let locales = localized_field_ids.locales(field_id);
             let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
 
@@ -179,8 +182,10 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
         let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
 
         let are_same_locales = old_locales == new_locales;
+        let reindex_facet_search =
+            settings_diff.new.facet_search && !settings_diff.old.facet_search;
 
-        if is_same_value && are_same_locales {
+        if is_same_value && are_same_locales && !reindex_facet_search {
             continue;
         }
 
@@ -191,18 +196,26 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
         let normalized_value = str::from_utf8(normalized_value_bytes)?;
 
         // Facet search normalization
-        {
-            let old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
-            let new_hyper_normalized_value = if are_same_locales {
-                &old_hyper_normalized_value
+        if settings_diff.new.facet_search {
+            let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
+            let old_hyper_normalized_value;
+            let old_hyper_normalized_value = if !settings_diff.old.facet_search
+                || deladd_reader.get(DelAdd::Deletion).is_none()
+            {
+                // if the facet search is disabled in the old settings or if no facet string is deleted,
+                // we don't need to normalize the facet string.
+                None
+            } else if are_same_locales {
+                Some(&new_hyper_normalized_value)
             } else {
-                &normalize_facet_string(normalized_value, new_locales)
+                old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
+                Some(&old_hyper_normalized_value)
             };
 
             let set = BTreeSet::from_iter(std::iter::once(normalized_value));
 
             // if the facet string is the same, we can put the deletion and addition in the same obkv.
-            if old_hyper_normalized_value == new_hyper_normalized_value.as_str() {
+            if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
                 // nothing to do if we delete and re-add the value.
                 if is_same_value {
                     continue;
@@ -222,7 +235,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
             } else {
                 // if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
                 // deletion
-                if deladd_reader.get(DelAdd::Deletion).is_some() {
+                if let Some(old_hyper_normalized_value) = old_hyper_normalized_value {
                     // insert old value
                     let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
                     buffer.clear();
diff --git a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
index 047669521..88c02fe70 100644
--- a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -80,7 +80,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
     let new_faceted_fids: BTreeSet<_> =
         settings_diff.new.faceted_fields_ids.iter().copied().collect();
 
-    if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
+    if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
         let mut cursor = obkv_documents.into_cursor()?;
         while let Some((docid_bytes, value)) = cursor.move_on_next()? {
             let obkv = obkv::KvReader::from_slice(value);
@@ -112,8 +112,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                         (field_id, None, add_value)
                     }
                     EitherOrBoth::Both(&field_id, _) => {
-                        // during settings update, recompute the changing settings only.
-                        if settings_diff.settings_update_only {
+                        // during settings update, recompute the changing settings only unless a global change is detected.
+                        if settings_diff.settings_update_only
+                            && !settings_diff.global_facet_settings_changed()
+                        {
                             continue;
                         }
 
diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs
index baecbdcf0..186cc501d 100644
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -29,6 +29,7 @@ pub use self::transform::{Transform, TransformOutput};
 use super::new::StdResult;
 use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError};
+use crate::index::{PrefixSearch, PrefixSettings};
 use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
@@ -82,8 +83,6 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
 
 #[derive(Default, Debug, Clone)]
 pub struct IndexDocumentsConfig {
-    pub words_prefix_threshold: Option<u32>,
-    pub max_prefix_length: Option<usize>,
     pub words_positions_level_group_size: Option<NonZeroU32>,
     pub words_positions_min_level_size: Option<NonZeroU32>,
     pub update_method: IndexDocumentsMethod,
@@ -565,14 +564,32 @@ where
             self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
 
         // Run the words prefixes update operation.
-        let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
-        if let Some(value) = self.config.words_prefix_threshold {
-            builder.threshold(value);
+        let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
+            self.index.prefix_settings(self.wtxn)?;
+
+        // If the prefix search is enabled at indexing time, we compute the prefixes.
+        if compute_prefixes == PrefixSearch::IndexingTime {
+            let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
+            builder.threshold(prefix_count_threshold);
+            builder.max_prefix_length(max_prefix_length);
+            builder.execute()?;
+        } else {
+            // If the prefix search is disabled at indexing time, we delete the previous words prefixes fst.
+            // And all the associated docids databases.
+            self.index.delete_words_prefixes_fst(self.wtxn)?;
+            self.index.word_prefix_docids.clear(self.wtxn)?;
+            self.index.exact_word_prefix_docids.clear(self.wtxn)?;
+            self.index.word_prefix_position_docids.clear(self.wtxn)?;
+            self.index.word_prefix_fid_docids.clear(self.wtxn)?;
+
+            databases_seen += 3;
+            (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
+                databases_seen,
+                total_databases: TOTAL_POSTING_DATABASE_COUNT,
+            });
+
+            return Ok(());
         }
-        if let Some(value) = self.config.max_prefix_length {
-            builder.max_prefix_length(value);
-        }
-        builder.execute()?;
 
         if (self.should_abort)() {
             return Err(Error::InternalError(InternalError::AbortedIndexation));
diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs
index 38bf90435..7477b5667 100644
--- a/crates/milli/src/update/index_documents/transform.rs
+++ b/crates/milli/src/update/index_documents/transform.rs
@@ -667,14 +667,23 @@ impl<'a, 'i> Transform<'a, 'i> {
         let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
 
         // If only a faceted field has been added, keep only this field.
-        let must_reindex_facets = settings_diff.reindex_facets();
-        let necessary_faceted_field = |id: FieldId| -> bool {
-            let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
-            must_reindex_facets
-                && modified_faceted_fields
-                    .iter()
-                    .any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
-        };
+        let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
+        let facet_fids_changed = settings_diff.facet_fids_changed();
+        let necessary_faceted_field =
+            |id: FieldId| -> bool {
+                let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
+                if global_facet_settings_changed {
+                    settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
+                        is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
+                    })
+                } else if facet_fids_changed {
+                    modified_faceted_fields.iter().any(|long| {
+                        is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
+                    })
+                } else {
+                    false
+                }
+            };
 
         // Alway provide all fields when vectors are involved because
         // we need the fields for the prompt/templating.
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 0f533f5aa..f1f5d96d0 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -445,7 +445,10 @@ where
 
         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
 
-        compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
+        if index.facet_search(wtxn)? {
+            compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
+        }
+
         compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
 
         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
diff --git a/crates/milli/src/update/new/word_fst_builder.rs b/crates/milli/src/update/new/word_fst_builder.rs
index 2b1c4604b..6bc72d91d 100644
--- a/crates/milli/src/update/new/word_fst_builder.rs
+++ b/crates/milli/src/update/new/word_fst_builder.rs
@@ -80,12 +80,12 @@ pub struct PrefixDelta {
 }
 
 struct PrefixFstBuilder {
-    prefix_count_threshold: u64,
+    prefix_count_threshold: usize,
     max_prefix_length: usize,
     /// TODO: Replace the full memory allocation
     prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
     current_prefix: Vec<Prefix>,
-    current_prefix_count: Vec<u64>,
+    current_prefix_count: Vec<usize>,
     modified_prefixes: HashSet<Prefix>,
     current_prefix_is_modified: Vec<bool>,
 }
@@ -95,7 +95,7 @@ impl PrefixFstBuilder {
         let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
             prefix_settings;
 
-        if !compute_prefixes {
+        if compute_prefixes != crate::index::PrefixSearch::IndexingTime {
             return None;
         }
 
diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs
index ccfdb1711..3d2702479 100644
--- a/crates/milli/src/update/settings.rs
+++ b/crates/milli/src/update/settings.rs
@@ -17,7 +17,8 @@ use super::IndexerConfig;
 use crate::criterion::Criterion;
 use crate::error::UserError;
 use crate::index::{
-    IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
+    IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
+    DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
 };
 use crate::order_by_map::OrderByMap;
 use crate::prompt::default_max_bytes;
@@ -177,6 +178,8 @@ pub struct Settings<'a, 't, 'i> {
     embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
     search_cutoff: Setting<u64>,
     localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
+    prefix_search: Setting<PrefixSearch>,
+    facet_search: Setting<bool>,
 }
 
 impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@@ -212,6 +215,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
             embedder_settings: Setting::NotSet,
             search_cutoff: Setting::NotSet,
             localized_attributes_rules: Setting::NotSet,
+            prefix_search: Setting::NotSet,
+            facet_search: Setting::NotSet,
             indexer_config,
         }
     }
@@ -418,6 +423,22 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         self.localized_attributes_rules = Setting::Reset;
     }
 
+    pub fn set_prefix_search(&mut self, value: PrefixSearch) {
+        self.prefix_search = Setting::Set(value);
+    }
+
+    pub fn reset_prefix_search(&mut self) {
+        self.prefix_search = Setting::Reset;
+    }
+
+    pub fn set_facet_search(&mut self, value: bool) {
+        self.facet_search = Setting::Set(value);
+    }
+
+    pub fn reset_facet_search(&mut self) {
+        self.facet_search = Setting::Reset;
+    }
+
     #[tracing::instrument(
         level = "trace"
         skip(self, progress_callback, should_abort, settings_diff),
@@ -944,7 +965,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                     false
                 } else {
                     self.index.put_proximity_precision(self.wtxn, new)?;
-                    true
+                    old.is_some() || new != ProximityPrecision::default()
                 }
             }
             Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
@@ -954,6 +975,42 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         Ok(changed)
     }
 
+    fn update_prefix_search(&mut self) -> Result<bool> {
+        let changed = match self.prefix_search {
+            Setting::Set(new) => {
+                let old = self.index.prefix_search(self.wtxn)?;
+                if old == Some(new) {
+                    false
+                } else {
+                    self.index.put_prefix_search(self.wtxn, new)?;
+                    old.is_some() || new != PrefixSearch::default()
+                }
+            }
+            Setting::Reset => self.index.delete_prefix_search(self.wtxn)?,
+            Setting::NotSet => false,
+        };
+
+        Ok(changed)
+    }
+
+    fn update_facet_search(&mut self) -> Result<bool> {
+        let changed = match self.facet_search {
+            Setting::Set(new) => {
+                let old = self.index.facet_search(self.wtxn)?;
+                if old == new {
+                    false
+                } else {
+                    self.index.put_facet_search(self.wtxn, new)?;
+                    true
+                }
+            }
+            Setting::Reset => self.index.delete_facet_search(self.wtxn)?,
+            Setting::NotSet => false,
+        };
+
+        Ok(changed)
+    }
+
     fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
         match std::mem::take(&mut self.embedder_settings) {
             Setting::Set(configs) => self.update_embedding_configs_set(configs),
@@ -1203,6 +1260,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         self.update_searchable()?;
         self.update_exact_attributes()?;
         self.update_proximity_precision()?;
+        self.update_prefix_search()?;
+        self.update_facet_search()?;
         self.update_localized_attributes_rules()?;
 
         let embedding_config_updates = self.update_embedding_configs()?;
@@ -1282,6 +1341,7 @@ impl InnerIndexSettingsDiff {
                 || old_settings.allowed_separators != new_settings.allowed_separators
                 || old_settings.dictionary != new_settings.dictionary
                 || old_settings.proximity_precision != new_settings.proximity_precision
+                || old_settings.prefix_search != new_settings.prefix_search
                 || old_settings.localized_searchable_fields_ids
                     != new_settings.localized_searchable_fields_ids
         };
@@ -1372,7 +1432,7 @@ impl InnerIndexSettingsDiff {
         }
     }
 
-    pub fn reindex_facets(&self) -> bool {
+    pub fn facet_fids_changed(&self) -> bool {
         let existing_fields = &self.new.existing_fields;
         if existing_fields.iter().any(|field| field.contains('.')) {
             return true;
@@ -1392,7 +1452,15 @@ impl InnerIndexSettingsDiff {
         }
 
         (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
-            || self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
+    }
+
+    pub fn global_facet_settings_changed(&self) -> bool {
+        self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
+            || self.old.facet_search != self.new.facet_search
+    }
+
+    pub fn reindex_facets(&self) -> bool {
+        self.facet_fids_changed() || self.global_facet_settings_changed()
     }
 
     pub fn reindex_vectors(&self) -> bool {
@@ -1432,6 +1500,8 @@ pub(crate) struct InnerIndexSettings {
     pub non_faceted_fields_ids: Vec<FieldId>,
     pub localized_searchable_fields_ids: LocalizedFieldIds,
     pub localized_faceted_fields_ids: LocalizedFieldIds,
+    pub prefix_search: PrefixSearch,
+    pub facet_search: bool,
 }
 
 impl InnerIndexSettings {
@@ -1457,6 +1527,8 @@ impl InnerIndexSettings {
             Some(embedding_configs) => embedding_configs,
             None => embedders(index.embedding_configs(rtxn)?)?,
         };
+        let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
+        let facet_search = index.facet_search(rtxn)?;
         let existing_fields: HashSet<_> = index
             .field_distribution(rtxn)?
             .into_iter()
@@ -1514,6 +1586,8 @@ impl InnerIndexSettings {
             non_faceted_fields_ids: vectors_fids.clone(),
             localized_searchable_fields_ids,
             localized_faceted_fields_ids,
+            prefix_search,
+            facet_search,
         })
     }
 
@@ -2721,6 +2795,8 @@ mod tests {
                     embedder_settings,
                     search_cutoff,
                     localized_attributes_rules,
+                    prefix_search,
+                    facet_search,
                 } = settings;
                 assert!(matches!(searchable_fields, Setting::NotSet));
                 assert!(matches!(displayed_fields, Setting::NotSet));
@@ -2746,6 +2822,8 @@ mod tests {
                 assert!(matches!(embedder_settings, Setting::NotSet));
                 assert!(matches!(search_cutoff, Setting::NotSet));
                 assert!(matches!(localized_attributes_rules, Setting::NotSet));
+                assert!(matches!(prefix_search, Setting::NotSet));
+                assert!(matches!(facet_search, Setting::NotSet));
             })
             .unwrap();
     }
diff --git a/crates/milli/src/update/words_prefixes_fst.rs b/crates/milli/src/update/words_prefixes_fst.rs
index d47d6d14c..d18bfa74c 100644
--- a/crates/milli/src/update/words_prefixes_fst.rs
+++ b/crates/milli/src/update/words_prefixes_fst.rs
@@ -9,7 +9,7 @@ use crate::{Index, Result, SmallString32};
 pub struct WordsPrefixesFst<'t, 'i> {
     wtxn: &'t mut RwTxn<'i>,
     index: &'i Index,
-    threshold: u32,
+    threshold: usize,
     max_prefix_length: usize,
 }
 
@@ -24,8 +24,8 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
     ///
     /// Default value is 100. This value must be higher than 50 and will be clamped
     /// to this bound otherwise.
-    pub fn threshold(&mut self, value: u32) -> &mut Self {
-        self.threshold = value.max(50);
+    pub fn threshold(&mut self, value: usize) -> &mut Self {
+        self.threshold = value;
         self
     }
 
@@ -34,7 +34,7 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
     /// Default value is `4` bytes. This value must be between 1 and 25 will be clamped
     /// to these bounds, otherwise.
     pub fn max_prefix_length(&mut self, value: usize) -> &mut Self {
-        self.max_prefix_length = value.clamp(1, 25);
+        self.max_prefix_length = value;
         self
     }
 

From aa460819a75f3184cb363fb24b915ff946b4171e Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 25 Nov 2024 16:09:15 +0100
Subject: [PATCH 013/158] Add more precise spans

---
 crates/milli/src/update/new/indexer/mod.rs | 373 ++++++++++++---------
 1 file changed, 207 insertions(+), 166 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 0f533f5aa..e285ca9cb 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -109,55 +109,71 @@ where
 
             let rtxn = index.read_txn()?;
 
+
             // document but we need to create a function that collects and compresses documents.
             let document_sender = extractor_sender.documents();
             let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
             let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-
-            extract(document_changes,
-                &document_extractor,
-                indexing_context,
-                &mut extractor_allocs,
-                &datastore,
-                Step::ExtractingDocuments,
-            )?;
-
-            for document_extractor_data in datastore {
-                let document_extractor_data = document_extractor_data.0.into_inner();
-                for (field, delta) in document_extractor_data.field_distribution_delta {
-                    let current = field_distribution.entry(field).or_default();
-                    // adding the delta should never cause a negative result, as we are removing fields that previously existed.
-                    *current = current.saturating_add_signed(delta);
-                }
-                document_extractor_data.docids_delta.apply_to(document_ids);
+            {
+                let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
+                let _entered = span.enter();
+                extract(document_changes,
+                    &document_extractor,
+                    indexing_context,
+                    &mut extractor_allocs,
+                    &datastore,
+                    Step::ExtractingDocuments,
+                )?;
             }
+            {
+                let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
+                let _entered = span.enter();
+                for document_extractor_data in datastore {
+                    let document_extractor_data = document_extractor_data.0.into_inner();
+                    for (field, delta) in document_extractor_data.field_distribution_delta {
+                        let current = field_distribution.entry(field).or_default();
+                        // adding the delta should never cause a negative result, as we are removing fields that previously existed.
+                        *current = current.saturating_add_signed(delta);
+                    }
+                    document_extractor_data.docids_delta.apply_to(document_ids);
+                }
 
-            field_distribution.retain(|_, v| *v != 0);
+                field_distribution.retain(|_, v| *v != 0);
+            }
 
             let facet_field_ids_delta;
 
             {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
-                let _entered = span.enter();
+                let caches = {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
+                    let _entered = span.enter();
 
-                facet_field_ids_delta = merge_and_send_facet_docids(
                     FacetedDocidsExtractor::run_extraction(
-                        grenad_parameters,
-                        document_changes,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        &extractor_sender.field_id_docid_facet_sender(),
-                        Step::ExtractingFacets
-                    )?,
-                    FacetDatabases::new(index),
-                    index,
-                    extractor_sender.facet_docids(),
-                )?;
+                            grenad_parameters,
+                            document_changes,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &extractor_sender.field_id_docid_facet_sender(),
+                            Step::ExtractingFacets
+                        )?
+                };
+
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
+                    let _entered = span.enter();
+
+                    facet_field_ids_delta = merge_and_send_facet_docids(
+                        caches,
+                        FacetDatabases::new(index),
+                        index,
+                        extractor_sender.facet_docids(),
+                    )?;
+                }
             }
 
             {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
-                let _entered = span.enter();
+
+
 
 
                 let WordDocidsCaches {
@@ -166,15 +182,19 @@ where
                     exact_word_docids,
                     word_position_docids,
                     fid_word_count_docids,
-                } = WordDocidsExtractors::run_extraction(
-                    grenad_parameters,
-                    document_changes,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    Step::ExtractingWords
-                )?;
+                } = {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
+                    let _entered = span.enter();
+
+                    WordDocidsExtractors::run_extraction(
+                        grenad_parameters,
+                        document_changes,
+                        indexing_context,
+                        &mut extractor_allocs,
+                        Step::ExtractingWords
+                    )?
+                };
 
-                // TODO Word Docids Merger
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
                     let _entered = span.enter();
@@ -187,7 +207,6 @@ where
                     )?;
                 }
 
-                // Word Fid Docids Merging
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
                     let _entered = span.enter();
@@ -200,7 +219,6 @@ where
                     )?;
                 }
 
-                // Exact Word Docids Merging
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
                     let _entered = span.enter();
@@ -213,7 +231,6 @@ where
                     )?;
                 }
 
-                // Word Position Docids Merging
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
                     let _entered = span.enter();
@@ -226,7 +243,6 @@ where
                     )?;
                 }
 
-                // Fid Word Count Docids Merging
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
                     let _entered = span.enter();
@@ -244,30 +260,34 @@ where
             // this works only if the settings didn't change during this transaction.
             let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
             if proximity_precision == ProximityPrecision::ByWord {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
-                let _entered = span.enter();
+                let caches = {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
+                    let _entered = span.enter();
 
+                    <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
+                        grenad_parameters,
+                        document_changes,
+                        indexing_context,
+                        &mut extractor_allocs,
+                        Step::ExtractingWordProximity,
+                    )?
+                };
 
-                let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
-                    grenad_parameters,
-                    document_changes,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    Step::ExtractingWordProximity,
-                )?;
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
+                    let _entered = span.enter();
 
-                merge_and_send_docids(
-                    caches,
-                    index.word_pair_proximity_docids.remap_types(),
-                    index,
-                    extractor_sender.docids::<WordPairProximityDocids>(),
-                    &indexing_context.must_stop_processing,
-                )?;
+                    merge_and_send_docids(
+                        caches,
+                        index.word_pair_proximity_docids.remap_types(),
+                        index,
+                        extractor_sender.docids::<WordPairProximityDocids>(),
+                        &indexing_context.must_stop_processing,
+                    )?;
+                }
             }
 
             'vectors: {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
-                let _entered = span.enter();
 
                 let mut index_embeddings = index.embedding_configs(&rtxn)?;
                 if index_embeddings.is_empty() {
@@ -277,13 +297,22 @@ where
                 let embedding_sender = extractor_sender.embeddings();
                 let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
                 let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-                extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
+                    let _entered = span.enter();
 
-                for config in &mut index_embeddings {
-                    'data: for data in datastore.iter_mut() {
-                        let data = &mut data.get_mut().0;
-                        let Some(deladd) = data.remove(&config.name) else { continue 'data; };
-                        deladd.apply_to(&mut config.user_provided);
+                    extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
+                }
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
+                    let _entered = span.enter();
+
+                    for config in &mut index_embeddings {
+                        'data: for data in datastore.iter_mut() {
+                            let data = &mut data.get_mut().0;
+                            let Some(deladd) = data.remove(&config.name) else { continue 'data; };
+                            deladd.apply_to(&mut config.user_provided);
+                        }
                     }
                 }
 
@@ -291,21 +320,24 @@ where
             }
 
             'geo: {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
-                let _entered = span.enter();
-
                 let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
                     break 'geo;
                 };
                 let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-                extract(
-                    document_changes,
-                    &extractor,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    &datastore,
-                    Step::WritingGeoPoints
-                )?;
+
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
+                    let _entered = span.enter();
+
+                    extract(
+                        document_changes,
+                        &extractor,
+                        indexing_context,
+                        &mut extractor_allocs,
+                        &datastore,
+                        Step::WritingGeoPoints
+                    )?;
+                }
 
                 merge_and_send_rtree(
                     datastore,
@@ -316,11 +348,7 @@ where
                 )?;
             }
 
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
-                let _entered = span.enter();
-                (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
-            }
+            (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
 
             Result::Ok(facet_field_ids_delta)
         })?;
@@ -352,90 +380,103 @@ where
             .collect();
 
         let mut arroy_writers = arroy_writers?;
-        for operation in writer_receiver {
-            match operation {
-                WriterOperation::DbOperation(db_operation) => {
-                    let database = db_operation.database(index);
-                    let database_name = db_operation.database_name();
-                    match db_operation.entry() {
-                        EntryOperation::Delete(e) => match database.delete(wtxn, e.entry()) {
-                            Ok(false) => unreachable!("We tried to delete an unknown key"),
-                            Ok(_) => (),
-                            Err(error) => {
-                                return Err(Error::InternalError(InternalError::StoreDeletion {
-                                    database_name,
-                                    key: e.entry().to_owned(),
-                                    error,
-                                }));
-                            }
-                        },
-                        EntryOperation::Write(e) => {
-                            if let Err(error) = database.put(wtxn, e.key(), e.value()) {
-                                return Err(Error::InternalError(InternalError::StorePut {
-                                    database_name,
-                                    key: e.key().to_owned(),
-                                    value_length: e.value().len(),
-                                    error,
-                                }));
+        {
+            let span = tracing::trace_span!(target: "indexing::write_db", "all");
+            let _entered = span.enter();
+
+            for operation in writer_receiver {
+                match operation {
+                    WriterOperation::DbOperation(db_operation) => {
+                        let database = db_operation.database(index);
+                        let database_name = db_operation.database_name();
+                        match db_operation.entry() {
+                            EntryOperation::Delete(e) => match database.delete(wtxn, e.entry()) {
+                                Ok(false) => unreachable!("We tried to delete an unknown key"),
+                                Ok(_) => (),
+                                Err(error) => {
+                                    return Err(Error::InternalError(
+                                        InternalError::StoreDeletion {
+                                            database_name,
+                                            key: e.entry().to_owned(),
+                                            error,
+                                        },
+                                    ));
+                                }
+                            },
+                            EntryOperation::Write(e) => {
+                                if let Err(error) = database.put(wtxn, e.key(), e.value()) {
+                                    return Err(Error::InternalError(InternalError::StorePut {
+                                        database_name,
+                                        key: e.key().to_owned(),
+                                        value_length: e.value().len(),
+                                        error,
+                                    }));
+                                }
                             }
                         }
                     }
+                    WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
+                        ArroyOperation::DeleteVectors { docid } => {
+                            for (
+                                _embedder_index,
+                                (_embedder_name, _embedder, writer, dimensions),
+                            ) in &mut arroy_writers
+                            {
+                                let dimensions = *dimensions;
+                                writer.del_items(wtxn, dimensions, docid)?;
+                            }
+                        }
+                        ArroyOperation::SetVectors {
+                            docid,
+                            embedder_id,
+                            embeddings: raw_embeddings,
+                        } => {
+                            let (_, _, writer, dimensions) = arroy_writers
+                                .get(&embedder_id)
+                                .expect("requested a missing embedder");
+
+                            let mut embeddings = Embeddings::new(*dimensions);
+                            for embedding in raw_embeddings {
+                                embeddings.append(embedding).unwrap();
+                            }
+
+                            writer.del_items(wtxn, *dimensions, docid)?;
+                            writer.add_items(wtxn, docid, &embeddings)?;
+                        }
+                        ArroyOperation::SetVector { docid, embedder_id, embedding } => {
+                            let (_, _, writer, dimensions) = arroy_writers
+                                .get(&embedder_id)
+                                .expect("requested a missing embedder");
+                            writer.del_items(wtxn, *dimensions, docid)?;
+                            writer.add_item(wtxn, docid, &embedding)?;
+                        }
+                        ArroyOperation::Finish { configs } => {
+                            let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
+                            let _entered = span.enter();
+
+                            (indexing_context.send_progress)(Progress::from_step(
+                                Step::WritingEmbeddingsToDatabase,
+                            ));
+
+                            for (
+                                _embedder_index,
+                                (_embedder_name, _embedder, writer, dimensions),
+                            ) in &mut arroy_writers
+                            {
+                                let dimensions = *dimensions;
+                                writer.build_and_quantize(
+                                    wtxn,
+                                    &mut rng,
+                                    dimensions,
+                                    false,
+                                    &indexing_context.must_stop_processing,
+                                )?;
+                            }
+
+                            index.put_embedding_configs(wtxn, configs)?;
+                        }
+                    },
                 }
-                WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
-                    ArroyOperation::DeleteVectors { docid } => {
-                        for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
-                            &mut arroy_writers
-                        {
-                            let dimensions = *dimensions;
-                            writer.del_items(wtxn, dimensions, docid)?;
-                        }
-                    }
-                    ArroyOperation::SetVectors {
-                        docid,
-                        embedder_id,
-                        embeddings: raw_embeddings,
-                    } => {
-                        let (_, _, writer, dimensions) =
-                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        // TODO: switch to Embeddings
-                        let mut embeddings = Embeddings::new(*dimensions);
-                        for embedding in raw_embeddings {
-                            embeddings.append(embedding).unwrap();
-                        }
-
-                        writer.del_items(wtxn, *dimensions, docid)?;
-                        writer.add_items(wtxn, docid, &embeddings)?;
-                    }
-                    ArroyOperation::SetVector { docid, embedder_id, embedding } => {
-                        let (_, _, writer, dimensions) =
-                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        writer.del_items(wtxn, *dimensions, docid)?;
-                        writer.add_item(wtxn, docid, &embedding)?;
-                    }
-                    ArroyOperation::Finish { configs } => {
-                        let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
-                        let _entered = span.enter();
-
-                        (indexing_context.send_progress)(Progress::from_step(
-                            Step::WritingEmbeddingsToDatabase,
-                        ));
-
-                        for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
-                            &mut arroy_writers
-                        {
-                            let dimensions = *dimensions;
-                            writer.build_and_quantize(
-                                wtxn,
-                                &mut rng,
-                                dimensions,
-                                false,
-                                &indexing_context.must_stop_processing,
-                            )?;
-                        }
-
-                        index.put_embedding_configs(wtxn, configs)?;
-                    }
-                },
             }
         }
 

From fa15be5bc46e0f2543860fcd704f13649d522c5b Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 25 Nov 2024 16:28:57 +0100
Subject: [PATCH 014/158] Add span around commit

---
 crates/index-scheduler/src/batch.rs | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 630471790..04cdb912f 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -1024,7 +1024,13 @@ impl IndexScheduler {
 
                 let mut index_wtxn = index.write_txn()?;
                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
-                index_wtxn.commit()?;
+
+                {
+                    let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
+                    let _entered = span.enter();
+
+                    index_wtxn.commit()?;
+                }
 
                 // if the update processed successfully, we're going to store the new
                 // stats of the index. Since the tasks have already been processed and

From d7bcfb2d197e2b225ae8716308323acc1d3ef176 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 26 Nov 2024 14:04:16 +0100
Subject: [PATCH 015/158] fix clippy

---
 crates/milli/src/update/facet/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs
index f4835e6a8..3eaf2f221 100644
--- a/crates/milli/src/update/facet/mod.rs
+++ b/crates/milli/src/update/facet/mod.rs
@@ -178,7 +178,7 @@ impl<'i> FacetsUpdate<'i> {
                 // We clear the facet search databases.
                 self.index.facet_id_string_fst.clear(wtxn)?;
                 self.index.facet_id_normalized_string_strings.clear(wtxn)?;
-                return Ok(());
+                Ok(())
             }
             Some(data) => index_facet_search(wtxn, data, self.index),
             None => Ok(()),

From 9008ecda3d0af01661024e79c649cb33fe270641 Mon Sep 17 00:00:00 2001
From: Many the fish <many@meilisearch.com>
Date: Tue, 26 Nov 2024 14:44:24 +0100
Subject: [PATCH 016/158] Update crates/meilisearch-types/src/settings.rs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Clément Renault <clement@meilisearch.com>
---
 crates/meilisearch-types/src/settings.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs
index 48481e364..b12dfc9a2 100644
--- a/crates/meilisearch-types/src/settings.rs
+++ b/crates/meilisearch-types/src/settings.rs
@@ -272,8 +272,8 @@ impl Settings<Checked> {
             embedders: Setting::Reset,
             search_cutoff_ms: Setting::Reset,
             localized_attributes: Setting::Reset,
-            facet_search: Setting::NotSet,
-            prefix_search: Setting::NotSet,
+            facet_search: Setting::Reset,
+            prefix_search: Setting::Reset,
             _kind: PhantomData,
         }
     }

From f014e786840097e6a02415ef064ccee8c6986e41 Mon Sep 17 00:00:00 2001
From: Many the fish <many@meilisearch.com>
Date: Tue, 26 Nov 2024 14:46:01 +0100
Subject: [PATCH 017/158] Update crates/milli/src/index.rs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Clément Renault <clement@meilisearch.com>
---
 crates/milli/src/index.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs
index 5bd24b9e4..b2f3cdbd1 100644
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -1693,9 +1693,6 @@ impl Index {
         Ok(PrefixSettings {
             compute_prefixes,
             max_prefix_length: 4,
-            #[cfg(not(test))]
-            prefix_count_threshold: 100,
-            #[cfg(test)]
             prefix_count_threshold: 100,
         })
     }

From 8f57b4fdf48c605dced71a4e8a89665fe32eabe9 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 26 Nov 2024 14:10:52 +0100
Subject: [PATCH 018/158] Span to measure the part of db writes that is after
 the merge/extraction

---
 crates/milli/src/update/new/indexer/mod.rs | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index e285ca9cb..e7c5e30a6 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -1,4 +1,5 @@
 use std::cmp::Ordering;
+use std::sync::atomic::AtomicBool;
 use std::sync::{OnceLock, RwLock};
 use std::thread::{self, Builder};
 
@@ -76,6 +77,7 @@ where
     SP: Fn(Progress) + Sync,
 {
     let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
+    let finished_extraction = AtomicBool::new(false);
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
     let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
@@ -100,6 +102,7 @@ where
     thread::scope(|s| -> Result<()> {
         let indexer_span = tracing::Span::current();
         let embedders = &embedders;
+        let finished_extraction = &finished_extraction;
         // prevent moving the field_distribution and document_ids in the inner closure...
         let field_distribution = &mut field_distribution;
         let document_ids = &mut document_ids;
@@ -350,6 +353,8 @@ where
 
             (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
 
+            finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
+
             Result::Ok(facet_field_ids_delta)
         })?;
 
@@ -384,7 +389,15 @@ where
             let span = tracing::trace_span!(target: "indexing::write_db", "all");
             let _entered = span.enter();
 
+            let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
+            let mut _entered_post_merge = None;
+
             for operation in writer_receiver {
+                if _entered_post_merge.is_none()
+                    && finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
+                {
+                    _entered_post_merge = Some(span.enter());
+                }
                 match operation {
                     WriterOperation::DbOperation(db_operation) => {
                         let database = db_operation.database(index);

From 2e896f30a50519d3c5435779dd2b1c41b66f158b Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 26 Nov 2024 15:53:54 +0100
Subject: [PATCH 019/158] Fix PR comments

---
 .../meilisearch/tests/search/facet_search.rs  | 60 +++++++++----------
 .../tests/settings/prefix_search_settings.rs  | 30 +++++-----
 crates/milli/src/index.rs                     |  6 +-
 crates/milli/src/update/facet/mod.rs          | 15 ++---
 4 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs
index 52b8171c4..8fbeae293 100644
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@@ -41,8 +41,8 @@ async fn simple_facet_search() {
 
     let documents = DOCUMENTS.clone();
     index.update_settings_filterable_attributes(json!(["genres"])).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -65,8 +65,8 @@ async fn advanced_facet_search() {
     let documents = DOCUMENTS.clone();
     index.update_settings_filterable_attributes(json!(["genres"])).await;
     index.update_settings_typo_tolerance(json!({ "enabled": false })).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(2).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await;
@@ -89,8 +89,8 @@ async fn more_advanced_facet_search() {
     let documents = DOCUMENTS.clone();
     index.update_settings_filterable_attributes(json!(["genres"])).await;
     index.update_settings_typo_tolerance(json!({ "disableOnWords": ["adventre"] })).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(2).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "adventre"})).await;
@@ -113,8 +113,8 @@ async fn simple_facet_search_with_max_values() {
     let documents = DOCUMENTS.clone();
     index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
     index.update_settings_filterable_attributes(json!(["genres"])).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(2).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -135,8 +135,8 @@ async fn simple_facet_search_by_count_with_max_values() {
         )
         .await;
     index.update_settings_filterable_attributes(json!(["genres"])).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(2).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -151,8 +151,8 @@ async fn non_filterable_facet_search_error() {
     let index = server.index("test");
 
     let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(0).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -170,8 +170,8 @@ async fn facet_search_dont_support_words() {
 
     let documents = DOCUMENTS.clone();
     index.update_settings_filterable_attributes(json!(["genres"])).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "words"})).await;
@@ -188,8 +188,8 @@ async fn simple_facet_search_with_sort_by_count() {
     let documents = DOCUMENTS.clone();
     index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await;
     index.update_settings_filterable_attributes(json!(["genres"])).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(2).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -207,8 +207,8 @@ async fn add_documents_and_deactivate_facet_search() {
     let index = server.index("test");
 
     let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(0).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
     let (response, code) = index
         .update_settings(json!({
             "facetSearch": false,
@@ -216,7 +216,7 @@ async fn add_documents_and_deactivate_facet_search() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -237,10 +237,10 @@ async fn deactivate_facet_search_and_add_documents() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
     let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -261,10 +261,10 @@ async fn deactivate_facet_search_add_documents_and_activate_facet_search() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
     let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) = index
         .update_settings(json!({
@@ -272,7 +272,7 @@ async fn deactivate_facet_search_add_documents_and_activate_facet_search() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(2).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
@@ -293,10 +293,10 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
     let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) = index
         .update_settings(json!({
@@ -304,7 +304,7 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(2).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
diff --git a/crates/meilisearch/tests/settings/prefix_search_settings.rs b/crates/meilisearch/tests/settings/prefix_search_settings.rs
index 34a891f97..5da758a7d 100644
--- a/crates/meilisearch/tests/settings/prefix_search_settings.rs
+++ b/crates/meilisearch/tests/settings/prefix_search_settings.rs
@@ -29,8 +29,8 @@ async fn add_docs_and_disable() {
     let server = Server::new().await;
     let index = server.index("test");
 
-    index.add_documents(DOCUMENTS.clone(), None).await;
-    index.wait_task(0).await;
+    let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) = index
         .update_settings(json!({
@@ -39,7 +39,7 @@ async fn add_docs_and_disable() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(1).await;
+    index.wait_task(response.uid()).await;
 
     // only 1 document should match
     index
@@ -96,10 +96,10 @@ async fn disable_and_add_docs() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
 
-    index.add_documents(DOCUMENTS.clone(), None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(response.uid()).await;
 
     // only 1 document should match
     index
@@ -155,10 +155,10 @@ async fn disable_add_docs_and_enable() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
 
-    index.add_documents(DOCUMENTS.clone(), None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) = index
         .update_settings(json!({
@@ -263,10 +263,10 @@ async fn disable_add_docs_and_reset() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
 
-    index.add_documents(DOCUMENTS.clone(), None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(response.uid()).await;
 
     let (response, code) = index
         .update_settings(json!({
@@ -370,10 +370,10 @@ async fn default_behavior() {
         }))
         .await;
     assert_eq!("202", code.as_str(), "{:?}", response);
-    index.wait_task(0).await;
+    index.wait_task(response.uid()).await;
 
-    index.add_documents(DOCUMENTS.clone(), None).await;
-    index.wait_task(1).await;
+    let (response, _code) = index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(response.uid()).await;
 
     // all documents should match
     index
diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs
index b2f3cdbd1..fe83877a7 100644
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -1690,11 +1690,7 @@ impl Index {
 
     pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
         let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
-        Ok(PrefixSettings {
-            compute_prefixes,
-            max_prefix_length: 4,
-            prefix_count_threshold: 100,
-        })
+        Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
     }
 }
 
diff --git a/crates/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs
index 3eaf2f221..911296577 100644
--- a/crates/milli/src/update/facet/mod.rs
+++ b/crates/milli/src/update/facet/mod.rs
@@ -172,14 +172,15 @@ impl<'i> FacetsUpdate<'i> {
             incremental_update.execute(wtxn)?;
         }
 
+        if !self.index.facet_search(wtxn)? {
+            // If facet search is disabled, we don't need to compute facet search databases.
+            // We clear the facet search databases.
+            self.index.facet_id_string_fst.clear(wtxn)?;
+            self.index.facet_id_normalized_string_strings.clear(wtxn)?;
+            return Ok(());
+        }
+
         match self.normalized_delta_data {
-            _ if !self.index.facet_search(wtxn)? => {
-                // If facet search is disabled, we don't need to compute facet search databases.
-                // We clear the facet search databases.
-                self.index.facet_id_string_fst.clear(wtxn)?;
-                self.index.facet_id_normalized_string_strings.clear(wtxn)?;
-                Ok(())
-            }
             Some(data) => index_facet_search(wtxn, data, self.index),
             None => Ok(()),
         }

From 18a9af353c262e776104c7d0a7ac8cdba96a8850 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 27 Nov 2024 11:12:08 +0100
Subject: [PATCH 020/158] Update Charabia version to v0.9.2

---
 Cargo.lock              | 12 +++++++-----
 crates/milli/Cargo.toml |  3 +--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0f2a13125..e4789da4a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -969,8 +969,9 @@ dependencies = [
 
 [[package]]
 name = "charabia"
-version = "0.9.1"
-source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
+version = "0.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf8921fe4d53ab8f9e8f9b72ce6f91726cfc40fffab1243d27db406b5e2e9cc2"
 dependencies = [
  "aho-corasick",
  "csv",
@@ -2709,7 +2710,8 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 [[package]]
 name = "irg-kvariants"
 version = "0.1.1"
-source = "git+https://github.com/meilisearch/charabia?branch=mutualize-char-normalizer#f8d8308cdb8db80819be7eeed5652cc4a995cc71"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
 dependencies = [
  "csv",
  "once_cell",
@@ -6017,9 +6019,9 @@ dependencies = [
 
 [[package]]
 name = "wana_kana"
-version = "3.0.0"
+version = "4.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "477976a5c56fb7b014795df5a2ce08d2de8bcd4d5980844c5bd3978a7fd1c30b"
+checksum = "a74666202acfcb4f9b995be2e3e9f7f530deb65e05a1407b8d0b30c9c451238a"
 dependencies = [
  "fnv",
  "itertools 0.10.5",
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index 1a3bfbcf1..a0bd86a42 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -18,8 +18,7 @@ bincode = "1.3.3"
 bstr = "1.9.1"
 bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-# charabia = { version = "0.9.0", default-features = false }
-charabia = { git = "https://github.com/meilisearch/charabia", branch = "mutualize-char-normalizer", default-features = false }
+charabia = { version = "0.9.2", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.13"
 deserr = "0.6.2"

From 79671c9faa2bdc2e2dcd83191a31a00e7175d2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 26 Nov 2024 12:19:32 +0100
Subject: [PATCH 021/158] Implement a first version of the bbqueue channels

---
 Cargo.lock                             |  7 ++++
 crates/milli/Cargo.toml                |  2 ++
 crates/milli/src/update/new/channel.rs | 46 ++++++++++++++++++++++++++
 3 files changed, 55 insertions(+)

diff --git a/Cargo.lock b/Cargo.lock
index e4789da4a..e2069db87 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -489,6 +489,11 @@ version = "0.22.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 
+[[package]]
+name = "bbqueue"
+version = "0.5.1"
+source = "git+https://github.com/kerollmops/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
+
 [[package]]
 name = "benchmarks"
 version = "1.12.0"
@@ -3611,6 +3616,7 @@ version = "1.12.0"
 dependencies = [
  "allocator-api2",
  "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bbqueue",
  "big_s",
  "bimap",
  "bincode",
@@ -3623,6 +3629,7 @@ dependencies = [
  "candle-transformers",
  "charabia",
  "concat-arrays",
+ "crossbeam",
  "crossbeam-channel",
  "csv",
  "deserr",
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index a0bd86a42..798a4ea19 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -98,6 +98,8 @@ allocator-api2 = "0.2.18"
 rustc-hash = "2.0.0"
 uell = "0.1.0"
 enum-iterator = "2.1.0"
+bbqueue = { git = "https://github.com/kerollmops/bbqueue" }
+crossbeam = "0.8.4"
 
 [dev-dependencies]
 mimalloc = { version = "0.1.43", default-features = false }
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 00b471b52..21cd6b87d 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -1,6 +1,7 @@
 use std::marker::PhantomData;
 use std::sync::atomic::{AtomicUsize, Ordering};
 
+use crossbeam::sync::{Parker, Unparker};
 use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
 use heed::types::Bytes;
 use heed::BytesDecode;
@@ -8,6 +9,7 @@ use memmap2::Mmap;
 use roaring::RoaringBitmap;
 
 use super::extract::FacetKind;
+use super::thread_local::{FullySend, ThreadLocal};
 use super::StdResult;
 use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
 use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
@@ -16,6 +18,50 @@ use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
 use crate::{DocumentId, Index};
 
+/// Creates a tuple of producer/receivers to be used by
+/// the extractors and the writer loop.
+///
+/// # Safety
+///
+/// Panics if the number of provided bbqueue is not exactly equal
+/// to the number of available threads in the rayon threadpool.
+pub fn extractor_writer_bbqueue(
+    bbqueue: &[bbqueue::BBBuffer],
+) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
+    assert_eq!(
+        bbqueue.len(),
+        rayon::current_num_threads(),
+        "You must provide as many BBBuffer as the available number of threads to extract"
+    );
+
+    let parker = Parker::new();
+    let extractors = ThreadLocal::with_capacity(bbqueue.len());
+    let producers = rayon::broadcast(|bi| {
+        let bbqueue = &bbqueue[bi.index()];
+        let (producer, consumer) = bbqueue.try_split_framed().unwrap();
+        extractors.get_or(|| FullySend(producer));
+        consumer
+    });
+
+    (
+        ExtractorBbqueueSender { inner: extractors, unparker: parker.unparker().clone() },
+        WriterBbqueueReceiver { inner: producers, parker },
+    )
+}
+
+pub struct ExtractorBbqueueSender<'a> {
+    inner: ThreadLocal<FullySend<bbqueue::framed::FrameProducer<'a>>>,
+    /// Used to wake up the receiver thread,
+    /// Used everytime we write something in the producer.
+    unparker: Unparker,
+}
+
+pub struct WriterBbqueueReceiver<'a> {
+    inner: Vec<bbqueue::framed::FrameConsumer<'a>>,
+    /// Used to park when no more work is required
+    parker: Parker,
+}
+
 /// The capacity of the channel is currently in number of messages.
 pub fn extractor_writer_channel(cap: usize) -> (ExtractorSender, WriterReceiver) {
     let (sender, receiver) = crossbeam_channel::bounded(cap);

From 8442db8101ccc7df7e5b5ab98f8be593d659700a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 26 Nov 2024 18:30:44 +0100
Subject: [PATCH 022/158] Implement mostly all senders

---
 .../cbo_roaring_bitmap_codec.rs               |  19 +
 crates/milli/src/update/new/channel.rs        | 641 ++++++++++--------
 .../milli/src/update/new/extract/documents.rs |  11 +-
 .../src/update/new/extract/vectors/mod.rs     |   6 +-
 crates/milli/src/update/new/indexer/mod.rs    |   8 +-
 crates/milli/src/update/new/merger.rs         |  17 +-
 6 files changed, 398 insertions(+), 304 deletions(-)

diff --git a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
index 257d5bd0a..cae1874dd 100644
--- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -41,6 +41,25 @@ impl CboRoaringBitmapCodec {
         }
     }
 
+    pub fn serialize_into_writer<W: io::Write>(
+        roaring: &RoaringBitmap,
+        mut writer: W,
+    ) -> io::Result<()> {
+        if roaring.len() <= THRESHOLD as u64 {
+            // If the number of items (u32s) to encode is less than or equal to the threshold
+            // it means that it would weigh the same or less than the RoaringBitmap
+            // header, so we directly encode them using ByteOrder instead.
+            for integer in roaring {
+                writer.write_u32::<NativeEndian>(integer)?;
+            }
+        } else {
+            // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
+            roaring.serialize_into(writer)?;
+        }
+
+        Ok(())
+    }
+
     pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
         if bytes.len() <= THRESHOLD * size_of::<u32>() {
             // If there is threshold or less than threshold integers that can fit into this array
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 21cd6b87d..cacc7b129 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -1,14 +1,19 @@
+use std::cell::RefCell;
 use std::marker::PhantomData;
-use std::sync::atomic::{AtomicUsize, Ordering};
+use std::num::NonZeroU16;
+use std::{mem, slice};
 
+use bbqueue::framed::{FrameGrantR, FrameProducer};
+use bytemuck::{NoUninit, CheckedBitPattern};
 use crossbeam::sync::{Parker, Unparker};
-use crossbeam_channel::{IntoIter, Receiver, SendError, Sender};
+use crossbeam_channel::{IntoIter, Receiver, SendError};
 use heed::types::Bytes;
 use heed::BytesDecode;
 use memmap2::Mmap;
 use roaring::RoaringBitmap;
 
 use super::extract::FacetKind;
+use super::ref_cell_ext::RefCellExt;
 use super::thread_local::{FullySend, ThreadLocal};
 use super::StdResult;
 use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
@@ -16,7 +21,7 @@ use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
 use crate::index::{db_name, IndexEmbeddingConfig};
 use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
-use crate::{DocumentId, Index};
+use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 
 /// Creates a tuple of producer/receivers to be used by
 /// the extractors and the writer loop.
@@ -26,125 +31,97 @@ use crate::{DocumentId, Index};
 /// Panics if the number of provided bbqueue is not exactly equal
 /// to the number of available threads in the rayon threadpool.
 pub fn extractor_writer_bbqueue(
-    bbqueue: &[bbqueue::BBBuffer],
+    bbbuffers: &[bbqueue::BBBuffer],
 ) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
     assert_eq!(
-        bbqueue.len(),
+        bbbuffers.len(),
         rayon::current_num_threads(),
         "You must provide as many BBBuffer as the available number of threads to extract"
     );
 
+    let capacity = bbbuffers.first().unwrap().capacity();
     let parker = Parker::new();
-    let extractors = ThreadLocal::with_capacity(bbqueue.len());
+    let extractors = ThreadLocal::with_capacity(bbbuffers.len());
     let producers = rayon::broadcast(|bi| {
-        let bbqueue = &bbqueue[bi.index()];
+        let bbqueue = &bbbuffers[bi.index()];
         let (producer, consumer) = bbqueue.try_split_framed().unwrap();
-        extractors.get_or(|| FullySend(producer));
+        extractors.get_or(|| FullySend(RefCell::new(producer)));
         consumer
     });
 
     (
-        ExtractorBbqueueSender { inner: extractors, unparker: parker.unparker().clone() },
+        ExtractorBbqueueSender {
+            inner: extractors,
+            capacity: capacity.checked_sub(9).unwrap(),
+            unparker: parker.unparker().clone(),
+        },
         WriterBbqueueReceiver { inner: producers, parker },
     )
 }
 
-pub struct ExtractorBbqueueSender<'a> {
-    inner: ThreadLocal<FullySend<bbqueue::framed::FrameProducer<'a>>>,
-    /// Used to wake up the receiver thread,
-    /// Used everytime we write something in the producer.
-    unparker: Unparker,
-}
-
 pub struct WriterBbqueueReceiver<'a> {
     inner: Vec<bbqueue::framed::FrameConsumer<'a>>,
     /// Used to park when no more work is required
     parker: Parker,
 }
 
-/// The capacity of the channel is currently in number of messages.
-pub fn extractor_writer_channel(cap: usize) -> (ExtractorSender, WriterReceiver) {
-    let (sender, receiver) = crossbeam_channel::bounded(cap);
-    (
-        ExtractorSender {
-            sender,
-            send_count: Default::default(),
-            writer_contentious_count: Default::default(),
-            extractor_contentious_count: Default::default(),
-        },
-        WriterReceiver(receiver),
-    )
-}
-
-pub enum KeyValueEntry {
-    Small { key_length: usize, data: Box<[u8]> },
-    Large { key_entry: KeyEntry, data: Mmap },
-}
-
-impl KeyValueEntry {
-    pub fn from_small_key_value(key: &[u8], value: &[u8]) -> Self {
-        let mut data = Vec::with_capacity(key.len() + value.len());
-        data.extend_from_slice(key);
-        data.extend_from_slice(value);
-        KeyValueEntry::Small { key_length: key.len(), data: data.into_boxed_slice() }
-    }
-
-    fn from_large_key_value(key: &[u8], value: Mmap) -> Self {
-        KeyValueEntry::Large { key_entry: KeyEntry::from_key(key), data: value }
-    }
-
-    pub fn key(&self) -> &[u8] {
-        match self {
-            KeyValueEntry::Small { key_length, data } => &data[..*key_length],
-            KeyValueEntry::Large { key_entry, data: _ } => key_entry.entry(),
-        }
-    }
-
-    pub fn value(&self) -> &[u8] {
-        match self {
-            KeyValueEntry::Small { key_length, data } => &data[*key_length..],
-            KeyValueEntry::Large { key_entry: _, data } => &data[..],
+impl<'a> WriterBbqueueReceiver<'a> {
+    pub fn read(&mut self) -> Option<FrameWithHeader<'a>> {
+        loop {
+            for consumer in &mut self.inner {
+                // mark the frame as auto release
+                if let Some() = consumer.read()
+            }
+            break None;
         }
     }
 }
 
-pub struct KeyEntry {
-    data: Box<[u8]>,
+struct FrameWithHeader<'a> {
+    header: EntryHeader,
+    frame: FrameGrantR<'a>,
 }
 
-impl KeyEntry {
-    pub fn from_key(key: &[u8]) -> Self {
-        KeyEntry { data: key.to_vec().into_boxed_slice() }
+#[derive(Debug, Clone, Copy, CheckedBitPattern)]
+#[repr(u8)]
+enum EntryHeader {
+    /// Wether a put of the key/value pair or a delete of the given key.
+    DbOperation {
+        /// The database on which to perform the operation.
+        database: Database,
+        /// The key length in the buffer.
+        ///
+        /// If None it means that the buffer is dedicated
+        /// to the key and it is therefore a deletion operation.
+        key_length: Option<NonZeroU16>,
+    },
+    ArroyDeleteVector {
+        docid: DocumentId,
+    },
+    /// The embedding is the remaining space and represents a non-aligned [f32].
+    ArroySetVector {
+        docid: DocumentId,
+        embedder_id: u8,
+    },
+}
+
+impl EntryHeader {
+    fn delete_key_size(key_length: u16) -> usize {
+        mem::size_of::<Self>() + key_length as usize
     }
 
-    pub fn entry(&self) -> &[u8] {
-        self.data.as_ref()
+    fn put_key_value_size(key_length: u16, value_length: usize) -> usize {
+        mem::size_of::<Self>() + key_length as usize + value_length
+    }
+
+    fn bytes_of(&self) -> &[u8] {
+        /// TODO do the variant matching ourselves
+        todo!()
     }
 }
 
-pub enum EntryOperation {
-    Delete(KeyEntry),
-    Write(KeyValueEntry),
-}
-
-pub enum WriterOperation {
-    DbOperation(DbOperation),
-    ArroyOperation(ArroyOperation),
-}
-
-pub enum ArroyOperation {
-    DeleteVectors { docid: DocumentId },
-    SetVectors { docid: DocumentId, embedder_id: u8, embeddings: Vec<Embedding> },
-    SetVector { docid: DocumentId, embedder_id: u8, embedding: Embedding },
-    Finish { configs: Vec<IndexEmbeddingConfig> },
-}
-
-pub struct DbOperation {
-    database: Database,
-    entry: EntryOperation,
-}
-
-#[derive(Debug)]
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(u32)]
 pub enum Database {
     Main,
     Documents,
@@ -220,82 +197,46 @@ impl From<FacetKind> for Database {
     }
 }
 
-impl DbOperation {
-    pub fn database(&self, index: &Index) -> heed::Database<Bytes, Bytes> {
-        self.database.database(index)
-    }
-
-    pub fn database_name(&self) -> &'static str {
-        self.database.database_name()
-    }
-
-    pub fn entry(self) -> EntryOperation {
-        self.entry
-    }
+pub struct ExtractorBbqueueSender<'a> {
+    inner: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
+    /// The capacity of this frame producer, will never be able to store more than that.
+    ///
+    /// Note that the FrameProducer requires up to 9 bytes to encode the length,
+    /// the capacity has been shrinked accordingly.
+    ///
+    /// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
+    capacity: usize,
+    /// Used to wake up the receiver thread,
+    /// Used everytime we write something in the producer.
+    unparker: Unparker,
 }
 
-pub struct WriterReceiver(Receiver<WriterOperation>);
-
-impl IntoIterator for WriterReceiver {
-    type Item = WriterOperation;
-    type IntoIter = IntoIter<Self::Item>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.0.into_iter()
-    }
-}
-
-pub struct ExtractorSender {
-    sender: Sender<WriterOperation>,
-    /// The number of message we sent in total in the channel.
-    send_count: AtomicUsize,
-    /// The number of times we sent something in a channel that was full.
-    writer_contentious_count: AtomicUsize,
-    /// The number of times we sent something in a channel that was empty.
-    extractor_contentious_count: AtomicUsize,
-}
-
-impl Drop for ExtractorSender {
-    fn drop(&mut self) {
-        let send_count = *self.send_count.get_mut();
-        let writer_contentious_count = *self.writer_contentious_count.get_mut();
-        let extractor_contentious_count = *self.extractor_contentious_count.get_mut();
-        tracing::debug!(
-            "Extractor channel stats: {send_count} sends, \
-            {writer_contentious_count} writer contentions ({}%), \
-            {extractor_contentious_count} extractor contentions ({}%)",
-            (writer_contentious_count as f32 / send_count as f32) * 100.0,
-            (extractor_contentious_count as f32 / send_count as f32) * 100.0
-        )
-    }
-}
-
-impl ExtractorSender {
-    pub fn docids<D: DatabaseType>(&self) -> WordDocidsSender<'_, D> {
+impl<'b> ExtractorBbqueueSender<'b> {
+    pub fn docids<'a, D: DatabaseType>(&'a self) -> WordDocidsSender<'a, 'b, D> {
         WordDocidsSender { sender: self, _marker: PhantomData }
     }
 
-    pub fn facet_docids(&self) -> FacetDocidsSender<'_> {
+    pub fn facet_docids<'a>(&'a self) -> FacetDocidsSender<'a, 'b> {
         FacetDocidsSender { sender: self }
     }
 
-    pub fn field_id_docid_facet_sender(&self) -> FieldIdDocidFacetSender<'_> {
-        FieldIdDocidFacetSender(self)
+    pub fn field_id_docid_facet_sender<'a>(&'a self) -> FieldIdDocidFacetSender<'a, 'b> {
+        FieldIdDocidFacetSender(&self)
     }
 
-    pub fn documents(&self) -> DocumentsSender<'_> {
-        DocumentsSender(self)
+    pub fn documents<'a>(&'a self) -> DocumentsSender<'a, 'b> {
+        DocumentsSender(&self)
     }
 
-    pub fn embeddings(&self) -> EmbeddingSender<'_> {
-        EmbeddingSender(&self.sender)
+    pub fn embeddings<'a>(&'a self) -> EmbeddingSender<'a, 'b> {
+        EmbeddingSender(&self)
     }
 
-    pub fn geo(&self) -> GeoSender<'_> {
-        GeoSender(&self.sender)
+    pub fn geo<'a>(&'a self) -> GeoSender<'a, 'b> {
+        GeoSender(&self)
     }
 
-    fn send_delete_vector(&self, docid: DocumentId) -> StdResult<(), SendError<()>> {
+    fn send_delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
         match self
             .sender
             .send(WriterOperation::ArroyOperation(ArroyOperation::DeleteVectors { docid }))
@@ -305,18 +246,69 @@ impl ExtractorSender {
         }
     }
 
-    fn send_db_operation(&self, op: DbOperation) -> StdResult<(), SendError<()>> {
-        if self.sender.is_full() {
-            self.writer_contentious_count.fetch_add(1, Ordering::SeqCst);
-        }
-        if self.sender.is_empty() {
-            self.extractor_contentious_count.fetch_add(1, Ordering::SeqCst);
+    fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> {
+        let capacity = self.capacity;
+        let refcell = self.inner.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let key_length = key.len().try_into().unwrap();
+        let value_length = value.len();
+        let total_length = EntryHeader::put_key_value_size(key_length, value_length);
+        if total_length > capacity {
+            unreachable!("entry larger that the bbqueue capacity");
         }
 
-        self.send_count.fetch_add(1, Ordering::SeqCst);
-        match self.sender.send(WriterOperation::DbOperation(op)) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
+        let payload_header =
+            EntryHeader::DbOperation { database, key_length: NonZeroU16::new(key_length) };
+
+        loop {
+            let mut grant = match producer.grant(total_length) {
+                Ok(grant) => grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            };
+
+            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
+            header.copy_from_slice(payload_header.bytes_of());
+            let (key_out, value_out) = remaining.split_at_mut(key.len());
+            key_out.copy_from_slice(key);
+            value_out.copy_from_slice(value);
+
+            // We could commit only the used memory.
+            grant.commit(total_length);
+
+            break Ok(());
+        }
+    }
+
+    fn delete_entry(&self, database: Database, key: &[u8]) -> crate::Result<()> {
+        let capacity = self.capacity;
+        let refcell = self.inner.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let key_length = key.len().try_into().unwrap();
+        let total_length = EntryHeader::delete_key_size(key_length);
+        if total_length > capacity {
+            unreachable!("entry larger that the bbqueue capacity");
+        }
+
+        let payload_header = EntryHeader::DbOperation { database, key_length: None };
+
+        loop {
+            let mut grant = match producer.grant(total_length) {
+                Ok(grant) => grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            };
+
+            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
+            header.copy_from_slice(payload_header.bytes_of());
+            remaining.copy_from_slice(key);
+
+            // We could commit only the used memory.
+            grant.commit(total_length);
+
+            break Ok(());
         }
     }
 }
@@ -356,159 +348,237 @@ impl DatabaseType for WordPositionDocids {
     const DATABASE: Database = Database::WordPositionDocids;
 }
 
-pub trait DocidsSender {
-    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>>;
-    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>>;
-}
-
-pub struct WordDocidsSender<'a, D> {
-    sender: &'a ExtractorSender,
+pub struct WordDocidsSender<'a, 'b, D> {
+    sender: &'a ExtractorBbqueueSender<'b>,
     _marker: PhantomData<D>,
 }
 
-impl<D: DatabaseType> DocidsSender for WordDocidsSender<'_, D> {
-    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
-        match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
+impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
+    pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
+        let capacity = self.sender.capacity;
+        let refcell = self.sender.inner.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let key_length = key.len().try_into().unwrap();
+        let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
+
+        let total_length = EntryHeader::put_key_value_size(key_length, value_length);
+        if total_length > capacity {
+            unreachable!("entry larger that the bbqueue capacity");
         }
-    }
 
-    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        match self.sender.send_db_operation(DbOperation { database: D::DATABASE, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
-    }
-}
-
-pub struct FacetDocidsSender<'a> {
-    sender: &'a ExtractorSender,
-}
-
-impl DocidsSender for FacetDocidsSender<'_> {
-    fn write(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
-        let (facet_kind, key) = FacetKind::extract_from_key(key);
-        let database = Database::from(facet_kind);
-        let entry = match facet_kind {
-            // skip level group size
-            FacetKind::String | FacetKind::Number => {
-                // add facet group size
-                let value = [&[1], value].concat();
-                EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &value))
-            }
-            _ => EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value)),
+        let payload_header = EntryHeader::DbOperation {
+            database: D::DATABASE,
+            key_length: NonZeroU16::new(key_length),
         };
-        match self.sender.send_db_operation(DbOperation { database, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
+
+        loop {
+            let mut grant = match producer.grant(total_length) {
+                Ok(grant) => grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            };
+
+            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
+            header.copy_from_slice(payload_header.bytes_of());
+            let (key_out, value_out) = remaining.split_at_mut(key.len());
+            key_out.copy_from_slice(key);
+            CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
+
+            // We could commit only the used memory.
+            grant.commit(total_length);
+
+            break Ok(());
         }
     }
 
-    fn delete(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+    pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
+        let capacity = self.sender.capacity;
+        let refcell = self.sender.inner.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let key_length = key.len().try_into().unwrap();
+        let total_length = EntryHeader::delete_key_size(key_length);
+        if total_length > capacity {
+            unreachable!("entry larger that the bbqueue capacity");
+        }
+
+        let payload_header = EntryHeader::DbOperation { database: D::DATABASE, key_length: None };
+
+        loop {
+            let mut grant = match producer.grant(total_length) {
+                Ok(grant) => grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            };
+
+            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
+            header.copy_from_slice(payload_header.bytes_of());
+            remaining.copy_from_slice(key);
+
+            // We could commit only the used memory.
+            grant.commit(total_length);
+
+            break Ok(());
+        }
+    }
+}
+
+pub struct FacetDocidsSender<'a, 'b> {
+    sender: &'a ExtractorBbqueueSender<'b>,
+}
+
+impl FacetDocidsSender<'_, '_> {
+    pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
+        let capacity = self.sender.capacity;
+        let refcell = self.sender.inner.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
         let (facet_kind, key) = FacetKind::extract_from_key(key);
-        let database = Database::from(facet_kind);
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        match self.sender.send_db_operation(DbOperation { database, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
+        let key_length = key.len().try_into().unwrap();
+
+        let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
+        let value_length = match facet_kind {
+            // We must take the facet group size into account
+            // when we serialize strings and numbers.
+            FacetKind::Number | FacetKind::String => value_length + 1,
+            FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_length,
+        };
+
+        let total_length = EntryHeader::put_key_value_size(key_length, value_length);
+        if total_length > capacity {
+            unreachable!("entry larger that the bbqueue capacity");
+        }
+
+        let payload_header = EntryHeader::DbOperation {
+            database: Database::from(facet_kind),
+            key_length: NonZeroU16::new(key_length),
+        };
+
+        loop {
+            let mut grant = match producer.grant(total_length) {
+                Ok(grant) => grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            };
+
+            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
+            header.copy_from_slice(payload_header.bytes_of());
+            let (key_out, value_out) = remaining.split_at_mut(key.len());
+            key_out.copy_from_slice(key);
+
+            let value_out = match facet_kind {
+                // We must take the facet group size into account
+                // when we serialize strings and numbers.
+                FacetKind::String | FacetKind::Number => {
+                    let (first, remaining) = value_out.split_first_mut().unwrap();
+                    *first = 1;
+                    remaining
+                }
+                FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
+            };
+            CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
+
+            // We could commit only the used memory.
+            grant.commit(total_length);
+
+            break Ok(());
+        }
+    }
+
+    pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
+        let capacity = self.sender.capacity;
+        let refcell = self.sender.inner.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let (facet_kind, key) = FacetKind::extract_from_key(key);
+        let key_length = key.len().try_into().unwrap();
+
+        let total_length = EntryHeader::delete_key_size(key_length);
+        if total_length > capacity {
+            unreachable!("entry larger that the bbqueue capacity");
+        }
+
+        let payload_header =
+            EntryHeader::DbOperation { database: Database::from(facet_kind), key_length: None };
+
+        loop {
+            let mut grant = match producer.grant(total_length) {
+                Ok(grant) => grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            };
+
+            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
+            header.copy_from_slice(payload_header.bytes_of());
+            remaining.copy_from_slice(key);
+
+            // We could commit only the used memory.
+            grant.commit(total_length);
+
+            break Ok(());
         }
     }
 }
 
-pub struct FieldIdDocidFacetSender<'a>(&'a ExtractorSender);
+pub struct FieldIdDocidFacetSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
-impl FieldIdDocidFacetSender<'_> {
-    pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> StdResult<(), SendError<()>> {
+impl FieldIdDocidFacetSender<'_, '_> {
+    pub fn write_facet_string(&self, key: &[u8], value: &[u8]) -> crate::Result<()> {
         debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok());
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, value));
-        self.0
-            .send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry })
+        self.0.write_key_value(Database::FieldIdDocidFacetStrings, key, value)
     }
 
-    pub fn write_facet_f64(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+    pub fn write_facet_f64(&self, key: &[u8]) -> crate::Result<()> {
         debug_assert!(FieldDocIdFacetF64Codec::bytes_decode(key).is_ok());
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(key, &[]));
-        self.0.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetF64s, entry })
+        self.0.write_key_value(Database::FieldIdDocidFacetF64s, key, &[])
     }
 
-    pub fn delete_facet_string(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+    pub fn delete_facet_string(&self, key: &[u8]) -> crate::Result<()> {
         debug_assert!(FieldDocIdFacetStringCodec::bytes_decode(key).is_ok());
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        self.0
-            .send_db_operation(DbOperation { database: Database::FieldIdDocidFacetStrings, entry })
+        self.0.delete_entry(Database::FieldIdDocidFacetStrings, key)
     }
 
-    pub fn delete_facet_f64(&self, key: &[u8]) -> StdResult<(), SendError<()>> {
+    pub fn delete_facet_f64(&self, key: &[u8]) -> crate::Result<()> {
         debug_assert!(FieldDocIdFacetF64Codec::bytes_decode(key).is_ok());
-        let entry = EntryOperation::Delete(KeyEntry::from_key(key));
-        self.0.send_db_operation(DbOperation { database: Database::FieldIdDocidFacetF64s, entry })
+        self.0.delete_entry(Database::FieldIdDocidFacetF64s, key)
     }
 }
 
-pub struct DocumentsSender<'a>(&'a ExtractorSender);
+pub struct DocumentsSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
-impl DocumentsSender<'_> {
+impl DocumentsSender<'_, '_> {
     /// TODO do that efficiently
     pub fn uncompressed(
         &self,
         docid: DocumentId,
         external_id: String,
         document: &KvReaderFieldId,
-    ) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
-            &docid.to_be_bytes(),
-            document.as_bytes(),
-        ));
-        match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }?;
-
-        let entry = EntryOperation::Write(KeyValueEntry::from_small_key_value(
+    ) -> crate::Result<()> {
+        self.0.write_key_value(Database::Documents, &docid.to_be_bytes(), document.as_bytes())?;
+        self.0.write_key_value(
+            Database::ExternalDocumentsIds,
             external_id.as_bytes(),
             &docid.to_be_bytes(),
-        ));
-        match self
-            .0
-            .send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry })
-        {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
+        )
     }
 
-    pub fn delete(&self, docid: DocumentId, external_id: String) -> StdResult<(), SendError<()>> {
-        let entry = EntryOperation::Delete(KeyEntry::from_key(&docid.to_be_bytes()));
-        match self.0.send_db_operation(DbOperation { database: Database::Documents, entry }) {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }?;
-
+    pub fn delete(&self, docid: DocumentId, external_id: String) -> crate::Result<()> {
+        self.0.delete_entry(Database::Documents, &docid.to_be_bytes())?;
         self.0.send_delete_vector(docid)?;
-
-        let entry = EntryOperation::Delete(KeyEntry::from_key(external_id.as_bytes()));
-        match self
-            .0
-            .send_db_operation(DbOperation { database: Database::ExternalDocumentsIds, entry })
-        {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
-        }
+        self.0.delete_entry(Database::ExternalDocumentsIds, external_id.as_bytes())
     }
 }
 
-pub struct EmbeddingSender<'a>(&'a Sender<WriterOperation>);
+pub struct EmbeddingSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
-impl EmbeddingSender<'_> {
+impl EmbeddingSender<'_, '_> {
     pub fn set_vectors(
         &self,
         docid: DocumentId,
         embedder_id: u8,
         embeddings: Vec<Embedding>,
-    ) -> StdResult<(), SendError<()>> {
+    ) -> crate::Result<()> {
         self.0
             .send(WriterOperation::ArroyOperation(ArroyOperation::SetVectors {
                 docid,
@@ -541,33 +611,36 @@ impl EmbeddingSender<'_> {
     }
 }
 
-pub struct GeoSender<'a>(&'a Sender<WriterOperation>);
+pub struct GeoSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
-impl GeoSender<'_> {
+impl GeoSender<'_, '_> {
     pub fn set_rtree(&self, value: Mmap) -> StdResult<(), SendError<()>> {
-        self.0
-            .send(WriterOperation::DbOperation(DbOperation {
-                database: Database::Main,
-                entry: EntryOperation::Write(KeyValueEntry::from_large_key_value(
-                    GEO_RTREE_KEY.as_bytes(),
-                    value,
-                )),
-            }))
-            .map_err(|_| SendError(()))
+        todo!("set rtree from file")
+        // self.0
+        //     .send(WriterOperation::DbOperation(DbOperation {
+        //         database: Database::Main,
+        //         entry: EntryOperation::Write(KeyValueEntry::from_large_key_value(
+        //             GEO_RTREE_KEY.as_bytes(),
+        //             value,
+        //         )),
+        //     }))
+        //     .map_err(|_| SendError(()))
     }
 
     pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> StdResult<(), SendError<()>> {
-        let mut buffer = Vec::new();
-        bitmap.serialize_into(&mut buffer).unwrap();
+        todo!("serialize directly into bbqueue (as a real roaringbitmap not a cbo)")
 
-        self.0
-            .send(WriterOperation::DbOperation(DbOperation {
-                database: Database::Main,
-                entry: EntryOperation::Write(KeyValueEntry::from_small_key_value(
-                    GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(),
-                    &buffer,
-                )),
-            }))
-            .map_err(|_| SendError(()))
+        // let mut buffer = Vec::new();
+        // bitmap.serialize_into(&mut buffer).unwrap();
+
+        // self.0
+        //     .send(WriterOperation::DbOperation(DbOperation {
+        //         database: Database::Main,
+        //         entry: EntryOperation::Write(KeyValueEntry::from_small_key_value(
+        //             GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(),
+        //             &buffer,
+        //         )),
+        //     }))
+        //     .map_err(|_| SendError(()))
     }
 }
diff --git a/crates/milli/src/update/new/extract/documents.rs b/crates/milli/src/update/new/extract/documents.rs
index aeb1d5694..13307025a 100644
--- a/crates/milli/src/update/new/extract/documents.rs
+++ b/crates/milli/src/update/new/extract/documents.rs
@@ -12,13 +12,14 @@ use crate::update::new::thread_local::FullySend;
 use crate::update::new::DocumentChange;
 use crate::vector::EmbeddingConfigs;
 use crate::Result;
-pub struct DocumentsExtractor<'a> {
-    document_sender: &'a DocumentsSender<'a>,
+
+pub struct DocumentsExtractor<'a, 'b> {
+    document_sender: DocumentsSender<'a, 'b>,
     embedders: &'a EmbeddingConfigs,
 }
 
-impl<'a> DocumentsExtractor<'a> {
-    pub fn new(document_sender: &'a DocumentsSender<'a>, embedders: &'a EmbeddingConfigs) -> Self {
+impl<'a, 'b> DocumentsExtractor<'a, 'b> {
+    pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a EmbeddingConfigs) -> Self {
         Self { document_sender, embedders }
     }
 }
@@ -29,7 +30,7 @@ pub struct DocumentExtractorData {
     pub field_distribution_delta: HashMap<String, i64>,
 }
 
-impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
     type Data = FullySend<RefCell<DocumentExtractorData>>;
 
     fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs
index 8ac73a8d7..52b13f37d 100644
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -20,7 +20,7 @@ use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAb
 
 pub struct EmbeddingExtractor<'a> {
     embedders: &'a EmbeddingConfigs,
-    sender: &'a EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a>,
     possible_embedding_mistakes: PossibleEmbeddingMistakes,
     threads: &'a ThreadPoolNoAbort,
 }
@@ -28,7 +28,7 @@ pub struct EmbeddingExtractor<'a> {
 impl<'a> EmbeddingExtractor<'a> {
     pub fn new(
         embedders: &'a EmbeddingConfigs,
-        sender: &'a EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a>,
         field_distribution: &'a FieldDistribution,
         threads: &'a ThreadPoolNoAbort,
     ) -> Self {
@@ -368,7 +368,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
         possible_embedding_mistakes: &PossibleEmbeddingMistakes,
         unused_vectors_distribution: &UnusedVectorsDistributionBump,
         threads: &ThreadPoolNoAbort,
-        sender: &EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a>,
         has_manual_generation: Option<&'a str>,
     ) -> Result<()> {
         if let Some(external_docid) = has_manual_generation {
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 35dea7a98..88a4c2f77 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -76,7 +76,11 @@ where
     MSP: Fn() -> bool + Sync,
     SP: Fn(Progress) + Sync,
 {
-    let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
+    /// TODO restrict memory and remove this memory from the extractors bum allocators
+    let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
+        .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
+        .collect();
+    let (extractor_sender, writer_receiver) = extractor_writer_bbqueue(&bbbuffers);
     let finished_extraction = AtomicBool::new(false);
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
@@ -115,7 +119,7 @@ where
 
             // document but we need to create a function that collects and compresses documents.
             let document_sender = extractor_sender.documents();
-            let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
+            let document_extractor = DocumentsExtractor::new(document_sender, embedders);
             let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
             {
                 let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index 039c56b9d..f2809b376 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -19,7 +19,7 @@ pub fn merge_and_send_rtree<'extractor, MSP>(
     datastore: impl IntoIterator<Item = RefCell<GeoExtractorData<'extractor>>>,
     rtxn: &RoTxn,
     index: &Index,
-    geo_sender: GeoSender<'_>,
+    geo_sender: GeoSender<'_, '_>,
     must_stop_processing: &MSP,
 ) -> Result<()>
 where
@@ -62,19 +62,19 @@ where
 }
 
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
-pub fn merge_and_send_docids<'extractor, MSP>(
+pub fn merge_and_send_docids<'extractor, MSP, D>(
     mut caches: Vec<BalancedCaches<'extractor>>,
     database: Database<Bytes, Bytes>,
     index: &Index,
-    docids_sender: impl DocidsSender + Sync,
+    docids_sender: WordDocidsSender<D>,
     must_stop_processing: &MSP,
 ) -> Result<()>
 where
     MSP: Fn() -> bool + Sync,
+    D: DatabaseType + Sync,
 {
     transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
         let rtxn = index.read_txn()?;
-        let mut buffer = Vec::new();
         if must_stop_processing() {
             return Err(InternalError::AbortedIndexation.into());
         }
@@ -82,8 +82,7 @@ where
             let current = database.get(&rtxn, key)?;
             match merge_cbo_bitmaps(current, del, add)? {
                 Operation::Write(bitmap) => {
-                    let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
-                    docids_sender.write(key, value).unwrap();
+                    docids_sender.write(key, &bitmap).unwrap();
                     Ok(())
                 }
                 Operation::Delete => {
@@ -101,21 +100,19 @@ pub fn merge_and_send_facet_docids<'extractor>(
     mut caches: Vec<BalancedCaches<'extractor>>,
     database: FacetDatabases,
     index: &Index,
-    docids_sender: impl DocidsSender + Sync,
+    docids_sender: FacetDocidsSender,
 ) -> Result<FacetFieldIdsDelta> {
     transpose_and_freeze_caches(&mut caches)?
         .into_par_iter()
         .map(|frozen| {
             let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
             let rtxn = index.read_txn()?;
-            let mut buffer = Vec::new();
             merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
                 let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
                 match merge_cbo_bitmaps(current, del, add)? {
                     Operation::Write(bitmap) => {
                         facet_field_ids_delta.register_from_key(key);
-                        let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
-                        docids_sender.write(key, value).unwrap();
+                        docids_sender.write(key, &bitmap).unwrap();
                         Ok(())
                     }
                     Operation::Delete => {

From 2094ce8a9a8febbab73efdeb0c477cda1c9c67c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 10:19:59 +0100
Subject: [PATCH 023/158] Move the arroy building after the writing loop

---
 crates/milli/src/update/new/indexer/mod.rs | 81 +++++++++++-----------
 1 file changed, 40 insertions(+), 41 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 88a4c2f77..f82f4af37 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -76,7 +76,7 @@ where
     MSP: Fn() -> bool + Sync,
     SP: Fn(Progress) + Sync,
 {
-    /// TODO restrict memory and remove this memory from the extractors bum allocators
+    /// TODO restrict memory and remove this memory from the extractors bump allocators
     let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
         .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
         .collect();
@@ -100,6 +100,7 @@ where
         send_progress,
     };
 
+    let mut index_embeddings = index.embedding_configs(wtxn)?;
     let mut field_distribution = index.field_distribution(wtxn)?;
     let mut document_ids = index.documents_ids(wtxn)?;
 
@@ -296,7 +297,6 @@ where
 
             'vectors: {
 
-                let mut index_embeddings = index.embedding_configs(&rtxn)?;
                 if index_embeddings.is_empty() {
                     break 'vectors;
                 }
@@ -322,8 +322,6 @@ where
                         }
                     }
                 }
-
-                embedding_sender.finish(index_embeddings).unwrap();
             }
 
             'geo: {
@@ -457,46 +455,47 @@ where
                                 embeddings.append(embedding).unwrap();
                             }
 
-                            writer.del_items(wtxn, *dimensions, docid)?;
-                            writer.add_items(wtxn, docid, &embeddings)?;
-                        }
-                        ArroyOperation::SetVector { docid, embedder_id, embedding } => {
-                            let (_, _, writer, dimensions) = arroy_writers
-                                .get(&embedder_id)
-                                .expect("requested a missing embedder");
-                            writer.del_items(wtxn, *dimensions, docid)?;
-                            writer.add_item(wtxn, docid, &embedding)?;
-                        }
-                        ArroyOperation::Finish { configs } => {
-                            let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
-                            let _entered = span.enter();
-
-                            (indexing_context.send_progress)(Progress::from_step(
-                                Step::WritingEmbeddingsToDatabase,
-                            ));
-
-                            for (
-                                _embedder_index,
-                                (_embedder_name, _embedder, writer, dimensions),
-                            ) in &mut arroy_writers
-                            {
-                                let dimensions = *dimensions;
-                                writer.build_and_quantize(
-                                    wtxn,
-                                    &mut rng,
-                                    dimensions,
-                                    false,
-                                    &indexing_context.must_stop_processing,
-                                )?;
-                            }
-
-                            index.put_embedding_configs(wtxn, configs)?;
-                        }
-                    },
-                }
+                        writer.del_items(wtxn, *dimensions, docid)?;
+                        writer.add_items(wtxn, docid, &embeddings)?;
+                    }
+                    ArroyOperation::SetVector { docid, embedder_id, embedding } => {
+                        let (_, _, writer, dimensions) =
+                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                        writer.del_items(wtxn, *dimensions, docid)?;
+                        writer.add_item(wtxn, docid, &embedding)?;
+                    }
+                    _otherwise => unreachable!(),
+                },
             }
         }
 
+        'vectors: {
+            let span =
+                tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
+            let _entered = span.enter();
+
+            if index_embeddings.is_empty() {
+                break 'vectors;
+            }
+
+            (indexing_context.send_progress)(Progress::from_step(
+                Step::WritingEmbeddingsToDatabase,
+            ));
+
+            for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
+                let dimensions = *dimensions;
+                writer.build_and_quantize(
+                    wtxn,
+                    &mut rng,
+                    dimensions,
+                    false,
+                    &indexing_context.must_stop_processing,
+                )?;
+            }
+
+            index.put_embedding_configs(wtxn, index_embeddings)?;
+        }
+
         (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
 
         let facet_field_ids_delta = extractor_handle.join().unwrap()?;

From e1e76f39d044d083bd7bf0552cc20b36d948af7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 13:03:39 +0100
Subject: [PATCH 024/158] Clean up dependencies

---
 Cargo.lock                          | 21 ++++-----------------
 Cargo.toml                          |  3 ---
 crates/benchmarks/Cargo.toml        |  2 +-
 crates/dump/Cargo.toml              |  2 +-
 crates/index-scheduler/Cargo.toml   |  4 ++--
 crates/index-scheduler/src/lib.rs   | 10 +++++-----
 crates/meilisearch-auth/Cargo.toml  |  2 +-
 crates/meilisearch-types/Cargo.toml |  2 +-
 crates/meilisearch/Cargo.toml       |  2 +-
 crates/milli/Cargo.toml             |  3 +--
 10 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index e2069db87..8a0a6b3d0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1251,19 +1251,6 @@ dependencies = [
  "itertools 0.10.5",
 ]
 
-[[package]]
-name = "crossbeam"
-version = "0.8.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8"
-dependencies = [
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-epoch",
- "crossbeam-queue",
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.13"
@@ -2621,7 +2608,7 @@ dependencies = [
  "big_s",
  "bincode",
  "bumpalo",
- "crossbeam",
+ "crossbeam-channel",
  "csv",
  "derive_builder 0.20.0",
  "dump",
@@ -3629,7 +3616,6 @@ dependencies = [
  "candle-transformers",
  "charabia",
  "concat-arrays",
- "crossbeam",
  "crossbeam-channel",
  "csv",
  "deserr",
@@ -4750,8 +4736,9 @@ dependencies = [
 
 [[package]]
 name = "roaring"
-version = "0.10.6"
-source = "git+https://github.com/RoaringBitmap/roaring-rs?branch=clone-iter-slice#8ff028e484fb6192a0acf5a669eaf18c30cada6e"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88"
 dependencies = [
  "bytemuck",
  "byteorder",
diff --git a/Cargo.toml b/Cargo.toml
index 5e53dbfa5..89a17d8fc 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,6 +43,3 @@ opt-level = 3
 opt-level = 3
 [profile.dev.package.roaring]
 opt-level = 3
-
-[patch.crates-io]
-roaring = { git = "https://github.com/RoaringBitmap/roaring-rs", branch = "clone-iter-slice" }
diff --git a/crates/benchmarks/Cargo.toml b/crates/benchmarks/Cargo.toml
index eec30ea3f..ccd256546 100644
--- a/crates/benchmarks/Cargo.toml
+++ b/crates/benchmarks/Cargo.toml
@@ -24,7 +24,7 @@ tempfile = "3.14.0"
 criterion = { version = "0.5.1", features = ["html_reports"] }
 rand = "0.8.5"
 rand_chacha = "0.3.1"
-roaring = "0.10.6"
+roaring = "0.10.7"
 
 [build-dependencies]
 anyhow = "1.0.86"
diff --git a/crates/dump/Cargo.toml b/crates/dump/Cargo.toml
index f9d2a9a0b..679a97b4e 100644
--- a/crates/dump/Cargo.toml
+++ b/crates/dump/Cargo.toml
@@ -17,7 +17,7 @@ http = "1.1.0"
 meilisearch-types = { path = "../meilisearch-types" }
 once_cell = "1.19.0"
 regex = "1.10.5"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 tar = "0.4.41"
diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml
index 657dd6dfe..ad4c1b4b9 100644
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@@ -24,7 +24,7 @@ meilisearch-types = { path = "../meilisearch-types" }
 page_size = "0.6.0"
 raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
 rayon = "1.10.0"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
@@ -45,7 +45,7 @@ bumpalo = "3.16.0"
 [dev-dependencies]
 arroy = "0.5.0"
 big_s = "1.0.2"
-crossbeam = "0.8.4"
+crossbeam-channel = "0.5.13"
 insta = { version = "1.39.0", features = ["json", "redactions"] }
 maplit = "1.0.2"
 meili-snap = { path = "../meili-snap" }
diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index cef24c1ea..1a1c71bae 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -407,7 +407,7 @@ pub struct IndexScheduler {
     ///
     /// See [self.breakpoint()](`IndexScheduler::breakpoint`) for an explanation.
     #[cfg(test)]
-    test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
+    test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
 
     /// A list of planned failures within the [`tick`](IndexScheduler::tick) method of the index scheduler.
     ///
@@ -476,7 +476,7 @@ impl IndexScheduler {
     /// Create an index scheduler and start its run loop.
     pub fn new(
         options: IndexSchedulerOptions,
-        #[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
+        #[cfg(test)] test_breakpoint_sdr: crossbeam_channel::Sender<(Breakpoint, bool)>,
         #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
     ) -> Result<Self> {
         std::fs::create_dir_all(&options.tasks_path)?;
@@ -2237,7 +2237,7 @@ mod tests {
     use std::time::Instant;
 
     use big_s::S;
-    use crossbeam::channel::RecvTimeoutError;
+    use crossbeam_channel::RecvTimeoutError;
     use file_store::File;
     use insta::assert_json_snapshot;
     use maplit::btreeset;
@@ -2289,7 +2289,7 @@ mod tests {
             configuration: impl Fn(&mut IndexSchedulerOptions),
         ) -> (Self, IndexSchedulerHandle) {
             let tempdir = TempDir::new().unwrap();
-            let (sender, receiver) = crossbeam::channel::bounded(0);
+            let (sender, receiver) = crossbeam_channel::bounded(0);
 
             let indexer_config = IndexerConfig { skip_index_budget: true, ..Default::default() };
 
@@ -2421,7 +2421,7 @@ mod tests {
     pub struct IndexSchedulerHandle {
         _tempdir: TempDir,
         index_scheduler: IndexScheduler,
-        test_breakpoint_rcv: crossbeam::channel::Receiver<(Breakpoint, bool)>,
+        test_breakpoint_rcv: crossbeam_channel::Receiver<(Breakpoint, bool)>,
         last_breakpoint: Breakpoint,
     }
 
diff --git a/crates/meilisearch-auth/Cargo.toml b/crates/meilisearch-auth/Cargo.toml
index ae0095ab4..591a40158 100644
--- a/crates/meilisearch-auth/Cargo.toml
+++ b/crates/meilisearch-auth/Cargo.toml
@@ -17,7 +17,7 @@ hmac = "0.12.1"
 maplit = "1.0.2"
 meilisearch-types = { path = "../meilisearch-types" }
 rand = "0.8.5"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order"] }
 sha2 = "0.10.8"
diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml
index 349c06080..aca06a018 100644
--- a/crates/meilisearch-types/Cargo.toml
+++ b/crates/meilisearch-types/Cargo.toml
@@ -25,7 +25,7 @@ fst = "0.4.7"
 memmap2 = "0.9.4"
 milli = { path = "../milli" }
 raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
 serde_json = "1.0.120"
diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml
index 2884f0c9c..8e134ebd0 100644
--- a/crates/meilisearch/Cargo.toml
+++ b/crates/meilisearch/Cargo.toml
@@ -103,7 +103,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.11"
 build-info = { version = "1.7.0", path = "../build-info" }
-roaring = "0.10.2"
+roaring = "0.10.7"
 mopa-maintained = "0.2.3"
 
 [dev-dependencies]
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index 798a4ea19..b66dec9a4 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -42,7 +42,7 @@ obkv = "0.3.0"
 once_cell = "1.19.0"
 ordered-float = "4.2.1"
 rayon = "1.10.0"
-roaring = { version = "0.10.6", features = ["serde"] }
+roaring = { version = "0.10.7", features = ["serde"] }
 rstar = { version = "0.12.0", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde_json = { version = "1.0.120", features = ["preserve_order", "raw_value"] }
@@ -99,7 +99,6 @@ rustc-hash = "2.0.0"
 uell = "0.1.0"
 enum-iterator = "2.1.0"
 bbqueue = { git = "https://github.com/kerollmops/bbqueue" }
-crossbeam = "0.8.4"
 
 [dev-dependencies]
 mimalloc = { version = "0.1.43", default-features = false }

From 6ac5b3b136086b8b25b1eb8dc2d6678e39846262 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 13:36:30 +0100
Subject: [PATCH 025/158] Finish most of the channels types

---
 crates/milli/src/error.rs                     |   9 +-
 crates/milli/src/update/new/channel.rs        | 662 +++++++++++-------
 .../src/update/new/extract/vectors/mod.rs     |   2 +-
 crates/milli/src/update/new/indexer/mod.rs    | 132 ++--
 4 files changed, 474 insertions(+), 331 deletions(-)

diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs
index 4da57a3e1..800dfa375 100644
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@@ -62,9 +62,14 @@ pub enum InternalError {
     #[error(transparent)]
     Store(#[from] MdbError),
     #[error("Cannot delete {key:?} from database {database_name}: {error}")]
-    StoreDeletion { database_name: &'static str, key: Vec<u8>, error: heed::Error },
+    StoreDeletion { database_name: &'static str, key: Box<[u8]>, error: heed::Error },
     #[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
-    StorePut { database_name: &'static str, key: Vec<u8>, value_length: usize, error: heed::Error },
+    StorePut {
+        database_name: &'static str,
+        key: Box<[u8]>,
+        value_length: usize,
+        error: heed::Error,
+    },
     #[error(transparent)]
     Utf8(#[from] str::Utf8Error),
     #[error("An indexation process was explicitly aborted")]
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index cacc7b129..d2681c915 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -1,12 +1,11 @@
 use std::cell::RefCell;
 use std::marker::PhantomData;
+use std::mem;
 use std::num::NonZeroU16;
-use std::{mem, slice};
 
 use bbqueue::framed::{FrameGrantR, FrameProducer};
-use bytemuck::{NoUninit, CheckedBitPattern};
-use crossbeam::sync::{Parker, Unparker};
-use crossbeam_channel::{IntoIter, Receiver, SendError};
+use bytemuck::{checked, CheckedBitPattern, NoUninit};
+use crossbeam_channel::SendError;
 use heed::types::Bytes;
 use heed::BytesDecode;
 use memmap2::Mmap;
@@ -17,21 +16,32 @@ use super::ref_cell_ext::RefCellExt;
 use super::thread_local::{FullySend, ThreadLocal};
 use super::StdResult;
 use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
+use crate::index::db_name;
 use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
-use crate::index::{db_name, IndexEmbeddingConfig};
 use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
 use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 
-/// Creates a tuple of producer/receivers to be used by
+/// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
 ///
+/// The `channel_capacity` parameter defines the number of
+/// too-large-to-fit-in-BBQueue entries that can be sent through
+/// a crossbeam channel. This parameter must stay low to make
+/// sure we do not use too much memory.
+///
+/// Note that the channel is also used to wake-up the receiver
+/// wehn new stuff is available in any BBQueue buffer but we send
+/// a message in this queue only if it is empty to avoid filling
+/// the channel *and* the BBQueue.
+///
 /// # Safety
 ///
-/// Panics if the number of provided bbqueue is not exactly equal
+/// Panics if the number of provided BBQueues is not exactly equal
 /// to the number of available threads in the rayon threadpool.
 pub fn extractor_writer_bbqueue(
     bbbuffers: &[bbqueue::BBBuffer],
+    channel_capacity: usize,
 ) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
     assert_eq!(
         bbbuffers.len(),
@@ -40,88 +50,252 @@ pub fn extractor_writer_bbqueue(
     );
 
     let capacity = bbbuffers.first().unwrap().capacity();
-    let parker = Parker::new();
-    let extractors = ThreadLocal::with_capacity(bbbuffers.len());
-    let producers = rayon::broadcast(|bi| {
+    // Read the field description to understand this
+    let capacity = capacity.checked_sub(9).unwrap();
+
+    let producers = ThreadLocal::with_capacity(bbbuffers.len());
+    let consumers = rayon::broadcast(|bi| {
         let bbqueue = &bbbuffers[bi.index()];
         let (producer, consumer) = bbqueue.try_split_framed().unwrap();
-        extractors.get_or(|| FullySend(RefCell::new(producer)));
+        producers.get_or(|| FullySend(RefCell::new(producer)));
         consumer
     });
 
-    (
-        ExtractorBbqueueSender {
-            inner: extractors,
-            capacity: capacity.checked_sub(9).unwrap(),
-            unparker: parker.unparker().clone(),
-        },
-        WriterBbqueueReceiver { inner: producers, parker },
-    )
+    let (sender, receiver) = crossbeam_channel::bounded(channel_capacity);
+    let sender = ExtractorBbqueueSender { sender, producers, capacity };
+    let receiver = WriterBbqueueReceiver { receiver, consumers };
+    (sender, receiver)
+}
+
+pub struct ExtractorBbqueueSender<'a> {
+    /// This channel is used to wake-up the receiver and
+    /// send large entries that cannot fit in the BBQueue.
+    sender: crossbeam_channel::Sender<ReceiverAction>,
+    /// A memory buffer, one by thread, is used to serialize
+    /// the entries directly in this shared, lock-free space.
+    producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
+    /// The capacity of this frame producer, will never be able to store more than that.
+    ///
+    /// Note that the FrameProducer requires up to 9 bytes to encode the length,
+    /// the capacity has been shrinked accordingly.
+    ///
+    /// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
+    capacity: usize,
 }
 
 pub struct WriterBbqueueReceiver<'a> {
-    inner: Vec<bbqueue::framed::FrameConsumer<'a>>,
-    /// Used to park when no more work is required
-    parker: Parker,
+    /// Used to wake up when new entries are available either in
+    /// any BBQueue buffer or directly sent throught this channel
+    /// (still written to disk).
+    receiver: crossbeam_channel::Receiver<ReceiverAction>,
+    /// The BBQueue frames to read when waking-up.
+    consumers: Vec<bbqueue::framed::FrameConsumer<'a>>,
+}
+
+/// The action to perform on the receiver/writer side.
+pub enum ReceiverAction {
+    /// Wake up, you have frames to read for the BBQueue buffers.
+    WakeUp,
+    /// An entry that cannot fit in the BBQueue buffers has been
+    /// written to disk, memory-mapped and must be written in the
+    /// database.
+    LargeEntry {
+        /// The database where the entry must be written.
+        database: Database,
+        /// The key of the entry that must be written in the database.
+        key: Box<[u8]>,
+        /// The large value that must be written.
+        ///
+        /// Note: We can probably use a `File` here and
+        /// use `Database::put_reserved` instead of memory-mapping.
+        value: Mmap,
+    },
 }
 
 impl<'a> WriterBbqueueReceiver<'a> {
+    pub fn recv(&mut self) -> Option<ReceiverAction> {
+        self.receiver.recv().ok()
+    }
+
     pub fn read(&mut self) -> Option<FrameWithHeader<'a>> {
-        loop {
-            for consumer in &mut self.inner {
-                // mark the frame as auto release
-                if let Some() = consumer.read()
+        for consumer in &mut self.consumers {
+            if let Some(frame) = consumer.read() {
+                return Some(FrameWithHeader::from(frame));
             }
-            break None;
         }
+        None
     }
 }
 
-struct FrameWithHeader<'a> {
+pub struct FrameWithHeader<'a> {
     header: EntryHeader,
     frame: FrameGrantR<'a>,
 }
 
-#[derive(Debug, Clone, Copy, CheckedBitPattern)]
-#[repr(u8)]
-enum EntryHeader {
-    /// Wether a put of the key/value pair or a delete of the given key.
-    DbOperation {
-        /// The database on which to perform the operation.
-        database: Database,
-        /// The key length in the buffer.
-        ///
-        /// If None it means that the buffer is dedicated
-        /// to the key and it is therefore a deletion operation.
-        key_length: Option<NonZeroU16>,
-    },
-    ArroyDeleteVector {
-        docid: DocumentId,
-    },
-    /// The embedding is the remaining space and represents a non-aligned [f32].
-    ArroySetVector {
-        docid: DocumentId,
-        embedder_id: u8,
-    },
+impl FrameWithHeader<'_> {
+    pub fn header(&self) -> EntryHeader {
+        self.header
+    }
+
+    pub fn frame(&self) -> &FrameGrantR<'_> {
+        &self.frame
+    }
 }
 
-impl EntryHeader {
-    fn delete_key_size(key_length: u16) -> usize {
-        mem::size_of::<Self>() + key_length as usize
-    }
-
-    fn put_key_value_size(key_length: u16, value_length: usize) -> usize {
-        mem::size_of::<Self>() + key_length as usize + value_length
-    }
-
-    fn bytes_of(&self) -> &[u8] {
-        /// TODO do the variant matching ourselves
-        todo!()
+impl<'a> From<FrameGrantR<'a>> for FrameWithHeader<'a> {
+    fn from(mut frame: FrameGrantR<'a>) -> Self {
+        frame.auto_release(true);
+        FrameWithHeader { header: EntryHeader::from_slice(&frame[..]), frame }
     }
 }
 
 #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
-#[repr(u32)]
+#[repr(C)]
+/// Wether a put of the key/value pair or a delete of the given key.
+pub struct DbOperation {
+    /// The database on which to perform the operation.
+    pub database: Database,
+    /// The key length in the buffer.
+    ///
+    /// If None it means that the buffer is dedicated
+    /// to the key and it is therefore a deletion operation.
+    pub key_length: Option<NonZeroU16>,
+}
+
+impl DbOperation {
+    pub fn key_value<'a>(&self, frame: &'a FrameGrantR<'_>) -> (&'a [u8], Option<&'a [u8]>) {
+        /// TODO replace the return type by an enum Write | Delete
+        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
+        match self.key_length {
+            Some(key_length) => {
+                let (key, value) = frame[skip..].split_at(key_length.get() as usize);
+                (key, Some(value))
+            }
+            None => (&frame[skip..], None),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(transparent)]
+pub struct ArroyDeleteVector {
+    pub docid: DocumentId,
+}
+
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(C)]
+/// The embedding is the remaining space and represents a non-aligned [f32].
+pub struct ArroySetVector {
+    pub docid: DocumentId,
+    pub embedder_id: u8,
+    _padding: [u8; 3],
+}
+
+impl ArroySetVector {
+    pub fn read_embedding_into_vec<'v>(
+        &self,
+        frame: &FrameGrantR<'_>,
+        vec: &'v mut Vec<f32>,
+    ) -> &'v [f32] {
+        vec.clear();
+        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
+        let bytes = &frame[skip..];
+        bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
+            let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
+            vec.push(f);
+        });
+        &vec[..]
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+#[repr(u8)]
+pub enum EntryHeader {
+    DbOperation(DbOperation),
+    ArroyDeleteVector(ArroyDeleteVector),
+    ArroySetVector(ArroySetVector),
+}
+
+impl EntryHeader {
+    const fn variant_size() -> usize {
+        mem::size_of::<u8>()
+    }
+
+    const fn variant_id(&self) -> u8 {
+        match self {
+            EntryHeader::DbOperation(_) => 0,
+            EntryHeader::ArroyDeleteVector(_) => 1,
+            EntryHeader::ArroySetVector(_) => 2,
+        }
+    }
+
+    const fn total_key_value_size(key_length: NonZeroU16, value_length: usize) -> usize {
+        Self::variant_size()
+            + mem::size_of::<DbOperation>()
+            + key_length.get() as usize
+            + value_length
+    }
+
+    const fn total_key_size(key_length: NonZeroU16) -> usize {
+        Self::total_key_value_size(key_length, 0)
+    }
+
+    const fn total_delete_vector_size() -> usize {
+        Self::variant_size() + mem::size_of::<ArroyDeleteVector>()
+    }
+
+    /// The `embedding_length` corresponds to the number of `f32` in the embedding.
+    fn total_set_vector_size(embedding_length: usize) -> usize {
+        Self::variant_size()
+            + mem::size_of::<ArroySetVector>()
+            + embedding_length * mem::size_of::<f32>()
+    }
+
+    fn header_size(&self) -> usize {
+        let payload_size = match self {
+            EntryHeader::DbOperation(op) => mem::size_of_val(op),
+            EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv),
+            EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv),
+        };
+        Self::variant_size() + payload_size
+    }
+
+    fn from_slice(slice: &[u8]) -> EntryHeader {
+        let (variant_id, remaining) = slice.split_first().unwrap();
+        match variant_id {
+            0 => {
+                let header_bytes = &remaining[..mem::size_of::<DbOperation>()];
+                let header = checked::pod_read_unaligned(header_bytes);
+                EntryHeader::DbOperation(header)
+            }
+            1 => {
+                let header_bytes = &remaining[..mem::size_of::<ArroyDeleteVector>()];
+                let header = checked::pod_read_unaligned(header_bytes);
+                EntryHeader::ArroyDeleteVector(header)
+            }
+            2 => {
+                let header_bytes = &remaining[..mem::size_of::<ArroySetVector>()];
+                let header = checked::pod_read_unaligned(header_bytes);
+                EntryHeader::ArroySetVector(header)
+            }
+            id => panic!("invalid variant id: {id}"),
+        }
+    }
+
+    fn serialize_into(&self, header_bytes: &mut [u8]) {
+        let (first, remaining) = header_bytes.split_first_mut().unwrap();
+        let payload_bytes = match self {
+            EntryHeader::DbOperation(op) => bytemuck::bytes_of(op),
+            EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv),
+            EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv),
+        };
+        *first = self.variant_id();
+        remaining.copy_from_slice(payload_bytes);
+    }
+}
+
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(u16)]
 pub enum Database {
     Main,
     Documents,
@@ -197,20 +371,6 @@ impl From<FacetKind> for Database {
     }
 }
 
-pub struct ExtractorBbqueueSender<'a> {
-    inner: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
-    /// The capacity of this frame producer, will never be able to store more than that.
-    ///
-    /// Note that the FrameProducer requires up to 9 bytes to encode the length,
-    /// the capacity has been shrinked accordingly.
-    ///
-    /// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
-    capacity: usize,
-    /// Used to wake up the receiver thread,
-    /// Used everytime we write something in the producer.
-    unparker: Unparker,
-}
-
 impl<'b> ExtractorBbqueueSender<'b> {
     pub fn docids<'a, D: DatabaseType>(&'a self) -> WordDocidsSender<'a, 'b, D> {
         WordDocidsSender { sender: self, _marker: PhantomData }
@@ -236,80 +396,171 @@ impl<'b> ExtractorBbqueueSender<'b> {
         GeoSender(&self)
     }
 
-    fn send_delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
-        match self
-            .sender
-            .send(WriterOperation::ArroyOperation(ArroyOperation::DeleteVectors { docid }))
-        {
-            Ok(()) => Ok(()),
-            Err(SendError(_)) => Err(SendError(())),
+    fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
+        let capacity = self.capacity;
+        let refcell = self.producers.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
+        let total_length = EntryHeader::total_delete_vector_size();
+        if total_length > capacity {
+            unreachable!("entry larger that the BBQueue capacity");
         }
+
+        // Spin loop to have a frame the size we requested.
+        let mut grant = loop {
+            match producer.grant(total_length) {
+                Ok(grant) => break grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            }
+        };
+
+        payload_header.serialize_into(&mut grant);
+
+        // We could commit only the used memory.
+        grant.commit(total_length);
+
+        Ok(())
+    }
+
+    fn set_vector(
+        &self,
+        docid: DocumentId,
+        embedder_id: u8,
+        embedding: &[f32],
+    ) -> crate::Result<()> {
+        let capacity = self.capacity;
+        let refcell = self.producers.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let payload_header =
+            EntryHeader::ArroySetVector(ArroySetVector { docid, embedder_id, _padding: [0; 3] });
+        let total_length = EntryHeader::total_set_vector_size(embedding.len());
+        if total_length > capacity {
+            unreachable!("entry larger that the BBQueue capacity");
+        }
+
+        // Spin loop to have a frame the size we requested.
+        let mut grant = loop {
+            match producer.grant(total_length) {
+                Ok(grant) => break grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            }
+        };
+
+        // payload_header.serialize_into(&mut grant);
+        let header_size = payload_header.header_size();
+        let (header_bytes, remaining) = grant.split_at_mut(header_size);
+        payload_header.serialize_into(header_bytes);
+        remaining.copy_from_slice(bytemuck::cast_slice(embedding));
+
+        // We could commit only the used memory.
+        grant.commit(total_length);
+
+        Ok(())
     }
 
     fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> {
+        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
+        self.write_key_value_with(database, key_length, value.len(), |buffer| {
+            let (key_buffer, value_buffer) = buffer.split_at_mut(key.len());
+            key_buffer.copy_from_slice(key);
+            value_buffer.copy_from_slice(value);
+            Ok(())
+        })
+    }
+
+    fn write_key_value_with<F>(
+        &self,
+        database: Database,
+        key_length: NonZeroU16,
+        value_length: usize,
+        key_value_writer: F,
+    ) -> crate::Result<()>
+    where
+        F: FnOnce(&mut [u8]) -> crate::Result<()>,
+    {
         let capacity = self.capacity;
-        let refcell = self.inner.get().unwrap();
+        let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
-        let key_length = key.len().try_into().unwrap();
-        let value_length = value.len();
-        let total_length = EntryHeader::put_key_value_size(key_length, value_length);
+        let operation = DbOperation { database, key_length: Some(key_length) };
+        let payload_header = EntryHeader::DbOperation(operation);
+        let total_length = EntryHeader::total_key_value_size(key_length, value_length);
         if total_length > capacity {
-            unreachable!("entry larger that the bbqueue capacity");
+            unreachable!("entry larger that the BBQueue capacity");
         }
 
-        let payload_header =
-            EntryHeader::DbOperation { database, key_length: NonZeroU16::new(key_length) };
-
-        loop {
-            let mut grant = match producer.grant(total_length) {
-                Ok(grant) => grant,
+        // Spin loop to have a frame the size we requested.
+        let mut grant = loop {
+            match producer.grant(total_length) {
+                Ok(grant) => break grant,
                 Err(bbqueue::Error::InsufficientSize) => continue,
                 Err(e) => unreachable!("{e:?}"),
-            };
+            }
+        };
 
-            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
-            header.copy_from_slice(payload_header.bytes_of());
-            let (key_out, value_out) = remaining.split_at_mut(key.len());
-            key_out.copy_from_slice(key);
-            value_out.copy_from_slice(value);
+        let header_size = payload_header.header_size();
+        let (header_bytes, remaining) = grant.split_at_mut(header_size);
+        payload_header.serialize_into(header_bytes);
+        key_value_writer(remaining)?;
 
-            // We could commit only the used memory.
-            grant.commit(total_length);
+        // We could commit only the used memory.
+        grant.commit(total_length);
 
-            break Ok(());
-        }
+        Ok(())
     }
 
     fn delete_entry(&self, database: Database, key: &[u8]) -> crate::Result<()> {
+        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
+        self.delete_entry_with(database, key_length, |buffer| {
+            buffer.copy_from_slice(key);
+            Ok(())
+        })
+    }
+
+    fn delete_entry_with<F>(
+        &self,
+        database: Database,
+        key_length: NonZeroU16,
+        key_writer: F,
+    ) -> crate::Result<()>
+    where
+        F: FnOnce(&mut [u8]) -> crate::Result<()>,
+    {
         let capacity = self.capacity;
-        let refcell = self.inner.get().unwrap();
+        let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
-        let key_length = key.len().try_into().unwrap();
-        let total_length = EntryHeader::delete_key_size(key_length);
+        // For deletion we do not specify the key length,
+        // it's in the remaining bytes.
+        let operation = DbOperation { database, key_length: None };
+        let payload_header = EntryHeader::DbOperation(operation);
+        let total_length = EntryHeader::total_key_size(key_length);
         if total_length > capacity {
-            unreachable!("entry larger that the bbqueue capacity");
+            unreachable!("entry larger that the BBQueue capacity");
         }
 
-        let payload_header = EntryHeader::DbOperation { database, key_length: None };
-
-        loop {
-            let mut grant = match producer.grant(total_length) {
-                Ok(grant) => grant,
+        // Spin loop to have a frame the size we requested.
+        let mut grant = loop {
+            match producer.grant(total_length) {
+                Ok(grant) => break grant,
                 Err(bbqueue::Error::InsufficientSize) => continue,
                 Err(e) => unreachable!("{e:?}"),
-            };
+            }
+        };
 
-            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
-            header.copy_from_slice(payload_header.bytes_of());
-            remaining.copy_from_slice(key);
+        let header_size = payload_header.header_size();
+        let (header_bytes, remaining) = grant.split_at_mut(header_size);
+        payload_header.serialize_into(header_bytes);
+        key_writer(remaining)?;
 
-            // We could commit only the used memory.
-            grant.commit(total_length);
+        // We could commit only the used memory.
+        grant.commit(total_length);
 
-            break Ok(());
-        }
+        Ok(())
     }
 }
 
@@ -355,72 +606,18 @@ pub struct WordDocidsSender<'a, 'b, D> {
 
 impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
     pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
-        let capacity = self.sender.capacity;
-        let refcell = self.sender.inner.get().unwrap();
-        let mut producer = refcell.0.borrow_mut_or_yield();
-
-        let key_length = key.len().try_into().unwrap();
+        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
-
-        let total_length = EntryHeader::put_key_value_size(key_length, value_length);
-        if total_length > capacity {
-            unreachable!("entry larger that the bbqueue capacity");
-        }
-
-        let payload_header = EntryHeader::DbOperation {
-            database: D::DATABASE,
-            key_length: NonZeroU16::new(key_length),
-        };
-
-        loop {
-            let mut grant = match producer.grant(total_length) {
-                Ok(grant) => grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            };
-
-            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
-            header.copy_from_slice(payload_header.bytes_of());
-            let (key_out, value_out) = remaining.split_at_mut(key.len());
-            key_out.copy_from_slice(key);
-            CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
-
-            // We could commit only the used memory.
-            grant.commit(total_length);
-
-            break Ok(());
-        }
+        self.sender.write_key_value_with(D::DATABASE, key_length, value_length, |buffer| {
+            let (key_buffer, value_buffer) = buffer.split_at_mut(key.len());
+            key_buffer.copy_from_slice(key);
+            CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
+            Ok(())
+        })
     }
 
     pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
-        let capacity = self.sender.capacity;
-        let refcell = self.sender.inner.get().unwrap();
-        let mut producer = refcell.0.borrow_mut_or_yield();
-
-        let key_length = key.len().try_into().unwrap();
-        let total_length = EntryHeader::delete_key_size(key_length);
-        if total_length > capacity {
-            unreachable!("entry larger that the bbqueue capacity");
-        }
-
-        let payload_header = EntryHeader::DbOperation { database: D::DATABASE, key_length: None };
-
-        loop {
-            let mut grant = match producer.grant(total_length) {
-                Ok(grant) => grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            };
-
-            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
-            header.copy_from_slice(payload_header.bytes_of());
-            remaining.copy_from_slice(key);
-
-            // We could commit only the used memory.
-            grant.commit(total_length);
-
-            break Ok(());
-        }
+        self.sender.delete_entry(D::DATABASE, key)
     }
 }
 
@@ -430,13 +627,10 @@ pub struct FacetDocidsSender<'a, 'b> {
 
 impl FacetDocidsSender<'_, '_> {
     pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
-        let capacity = self.sender.capacity;
-        let refcell = self.sender.inner.get().unwrap();
-        let mut producer = refcell.0.borrow_mut_or_yield();
-
         let (facet_kind, key) = FacetKind::extract_from_key(key);
-        let key_length = key.len().try_into().unwrap();
+        let database = Database::from(facet_kind);
 
+        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
         let value_length = match facet_kind {
             // We must take the facet group size into account
@@ -445,26 +639,8 @@ impl FacetDocidsSender<'_, '_> {
             FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_length,
         };
 
-        let total_length = EntryHeader::put_key_value_size(key_length, value_length);
-        if total_length > capacity {
-            unreachable!("entry larger that the bbqueue capacity");
-        }
-
-        let payload_header = EntryHeader::DbOperation {
-            database: Database::from(facet_kind),
-            key_length: NonZeroU16::new(key_length),
-        };
-
-        loop {
-            let mut grant = match producer.grant(total_length) {
-                Ok(grant) => grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            };
-
-            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
-            header.copy_from_slice(payload_header.bytes_of());
-            let (key_out, value_out) = remaining.split_at_mut(key.len());
+        self.sender.write_key_value_with(database, key_length, value_length, |buffer| {
+            let (key_out, value_out) = buffer.split_at_mut(key.len());
             key_out.copy_from_slice(key);
 
             let value_out = match facet_kind {
@@ -477,47 +653,17 @@ impl FacetDocidsSender<'_, '_> {
                 }
                 FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
             };
+
             CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
 
-            // We could commit only the used memory.
-            grant.commit(total_length);
-
-            break Ok(());
-        }
+            Ok(())
+        })
     }
 
     pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
-        let capacity = self.sender.capacity;
-        let refcell = self.sender.inner.get().unwrap();
-        let mut producer = refcell.0.borrow_mut_or_yield();
-
         let (facet_kind, key) = FacetKind::extract_from_key(key);
-        let key_length = key.len().try_into().unwrap();
-
-        let total_length = EntryHeader::delete_key_size(key_length);
-        if total_length > capacity {
-            unreachable!("entry larger that the bbqueue capacity");
-        }
-
-        let payload_header =
-            EntryHeader::DbOperation { database: Database::from(facet_kind), key_length: None };
-
-        loop {
-            let mut grant = match producer.grant(total_length) {
-                Ok(grant) => grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            };
-
-            let (header, remaining) = grant.split_at_mut(mem::size_of::<EntryHeader>());
-            header.copy_from_slice(payload_header.bytes_of());
-            remaining.copy_from_slice(key);
-
-            // We could commit only the used memory.
-            grant.commit(total_length);
-
-            break Ok(());
-        }
+        let database = Database::from(facet_kind);
+        self.sender.delete_entry(database, key)
     }
 }
 
@@ -565,7 +711,7 @@ impl DocumentsSender<'_, '_> {
 
     pub fn delete(&self, docid: DocumentId, external_id: String) -> crate::Result<()> {
         self.0.delete_entry(Database::Documents, &docid.to_be_bytes())?;
-        self.0.send_delete_vector(docid)?;
+        self.0.delete_vector(docid)?;
         self.0.delete_entry(Database::ExternalDocumentsIds, external_id.as_bytes())
     }
 }
@@ -579,13 +725,10 @@ impl EmbeddingSender<'_, '_> {
         embedder_id: u8,
         embeddings: Vec<Embedding>,
     ) -> crate::Result<()> {
-        self.0
-            .send(WriterOperation::ArroyOperation(ArroyOperation::SetVectors {
-                docid,
-                embedder_id,
-                embeddings,
-            }))
-            .map_err(|_| SendError(()))
+        for embedding in embeddings {
+            self.set_vector(docid, embedder_id, embedding)?;
+        }
+        Ok(())
     }
 
     pub fn set_vector(
@@ -593,21 +736,8 @@ impl EmbeddingSender<'_, '_> {
         docid: DocumentId,
         embedder_id: u8,
         embedding: Embedding,
-    ) -> StdResult<(), SendError<()>> {
-        self.0
-            .send(WriterOperation::ArroyOperation(ArroyOperation::SetVector {
-                docid,
-                embedder_id,
-                embedding,
-            }))
-            .map_err(|_| SendError(()))
-    }
-
-    /// Marks all embedders as "to be built"
-    pub fn finish(self, configs: Vec<IndexEmbeddingConfig>) -> StdResult<(), SendError<()>> {
-        self.0
-            .send(WriterOperation::ArroyOperation(ArroyOperation::Finish { configs }))
-            .map_err(|_| SendError(()))
+    ) -> crate::Result<()> {
+        self.0.set_vector(docid, embedder_id, &embedding[..])
     }
 }
 
diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs
index 52b13f37d..42278d443 100644
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -76,7 +76,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                 context.data,
                 &self.possible_embedding_mistakes,
                 self.threads,
-                self.sender,
+                &self.sender,
                 &context.doc_alloc,
             ))
         }
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index f82f4af37..1fd60b610 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -40,7 +40,7 @@ use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
 use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
 use crate::update::settings::InnerIndexSettings;
 use crate::update::{FacetsUpdateBulk, GrenadParameters};
-use crate::vector::{ArroyWrapper, EmbeddingConfigs, Embeddings};
+use crate::vector::{ArroyWrapper, EmbeddingConfigs};
 use crate::{
     Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
     ThreadPoolNoAbortBuilder, UserError,
@@ -80,7 +80,7 @@ where
     let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
         .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
         .collect();
-    let (extractor_sender, writer_receiver) = extractor_writer_bbqueue(&bbbuffers);
+    let (extractor_sender, writer_receiver) = extractor_writer_bbqueue(&bbbuffers, 1000);
     let finished_extraction = AtomicBool::new(false);
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
@@ -386,7 +386,11 @@ where
             })
             .collect();
 
+        // Used by by the ArroySetVector to copy the embedding into an
+        // aligned memory area, required by arroy to accept a new vector.
+        let mut aligned_embedding = Vec::new();
         let mut arroy_writers = arroy_writers?;
+
         {
             let span = tracing::trace_span!(target: "indexing::write_db", "all");
             let _entered = span.enter();
@@ -394,81 +398,85 @@ where
             let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
             let mut _entered_post_merge = None;
 
-            for operation in writer_receiver {
+            while let Some(action) = writer_receiver.recv() {
                 if _entered_post_merge.is_none()
                     && finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
                 {
                     _entered_post_merge = Some(span.enter());
                 }
-                match operation {
-                    WriterOperation::DbOperation(db_operation) => {
-                        let database = db_operation.database(index);
-                        let database_name = db_operation.database_name();
-                        match db_operation.entry() {
-                            EntryOperation::Delete(e) => match database.delete(wtxn, e.entry()) {
-                                Ok(false) => unreachable!("We tried to delete an unknown key"),
-                                Ok(_) => (),
-                                Err(error) => {
-                                    return Err(Error::InternalError(
-                                        InternalError::StoreDeletion {
-                                            database_name,
-                                            key: e.entry().to_owned(),
-                                            error,
-                                        },
-                                    ));
-                                }
-                            },
-                            EntryOperation::Write(e) => {
-                                if let Err(error) = database.put(wtxn, e.key(), e.value()) {
-                                    return Err(Error::InternalError(InternalError::StorePut {
-                                        database_name,
-                                        key: e.key().to_owned(),
-                                        value_length: e.value().len(),
-                                        error,
-                                    }));
-                                }
-                            }
+
+                match action {
+                    ReceiverAction::WakeUp => (),
+                    ReceiverAction::LargeEntry { database, key, value } => {
+                        let database_name = database.database_name();
+                        let database = database.database(index);
+                        if let Err(error) = database.put(wtxn, &key, &value) {
+                            return Err(Error::InternalError(InternalError::StorePut {
+                                database_name,
+                                key,
+                                value_length: value.len(),
+                                error,
+                            }));
                         }
                     }
-                    WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
-                        ArroyOperation::DeleteVectors { docid } => {
-                            for (
-                                _embedder_index,
-                                (_embedder_name, _embedder, writer, dimensions),
-                            ) in &mut arroy_writers
-                            {
+                }
+
+                while let Some(frame_with_header) = writer_receiver.read() {
+                    match frame_with_header.header() {
+                        EntryHeader::DbOperation(operation) => {
+                            let database_name = operation.database.database_name();
+                            let database = operation.database.database(index);
+                            let frame = frame_with_header.frame();
+                            match operation.key_value(frame) {
+                                (key, Some(value)) => {
+                                    if let Err(error) = database.put(wtxn, key, value) {
+                                        return Err(Error::InternalError(InternalError::StorePut {
+                                            database_name,
+                                            key: key.into(),
+                                            value_length: value.len(),
+                                            error,
+                                        }));
+                                    }
+                                }
+                                (key, None) => match database.delete(wtxn, key) {
+                                    Ok(false) => {
+                                        unreachable!("We tried to delete an unknown key: {key:?}")
+                                    }
+                                    Ok(_) => (),
+                                    Err(error) => {
+                                        return Err(Error::InternalError(
+                                            InternalError::StoreDeletion {
+                                                database_name,
+                                                key: key.into(),
+                                                error,
+                                            },
+                                        ));
+                                    }
+                                },
+                            }
+                        }
+                        EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
+                            for (_index, (_name, _embedder, writer, dimensions)) in &mut arroy_writers {
                                 let dimensions = *dimensions;
                                 writer.del_items(wtxn, dimensions, docid)?;
                             }
                         }
-                        ArroyOperation::SetVectors {
-                            docid,
-                            embedder_id,
-                            embeddings: raw_embeddings,
-                        } => {
-                            let (_, _, writer, dimensions) = arroy_writers
-                                .get(&embedder_id)
-                                .expect("requested a missing embedder");
-
-                            let mut embeddings = Embeddings::new(*dimensions);
-                            for embedding in raw_embeddings {
-                                embeddings.append(embedding).unwrap();
-                            }
-
-                        writer.del_items(wtxn, *dimensions, docid)?;
-                        writer.add_items(wtxn, docid, &embeddings)?;
+                        EntryHeader::ArroySetVector(asv) => {
+                            let ArroySetVector { docid, embedder_id, .. } = asv;
+                            let frame = frame_with_header.frame();
+                            let embedding = asv.read_embedding_into_vec(frame, &mut aligned_embedding);
+                            let (_, _, writer, dimensions) =
+                                arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                            writer.del_items(wtxn, *dimensions, docid)?;
+                            writer.add_item(wtxn, docid, embedding)?;
+                        }
                     }
-                    ArroyOperation::SetVector { docid, embedder_id, embedding } => {
-                        let (_, _, writer, dimensions) =
-                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        writer.del_items(wtxn, *dimensions, docid)?;
-                        writer.add_item(wtxn, docid, &embedding)?;
-                    }
-                    _otherwise => unreachable!(),
-                },
+                }
             }
         }
 
+        todo!("read the BBQueue once the channel is closed");
+
         'vectors: {
             let span =
                 tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");

From 70802eb7c72473fb5cb8a1b0258a9a6ab88b81f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 13:45:47 +0100
Subject: [PATCH 026/158] Fix most issues with the lifetimes

---
 crates/milli/src/update/new/channel.rs        |  7 ++++++
 .../new/extract/faceted/extract_facets.rs     |  6 ++---
 .../src/update/new/extract/vectors/mod.rs     | 22 +++++++++----------
 crates/milli/src/update/new/indexer/mod.rs    |  6 ++---
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index d2681c915..d1d64814e 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -93,6 +93,7 @@ pub struct WriterBbqueueReceiver<'a> {
 }
 
 /// The action to perform on the receiver/writer side.
+#[derive(Debug)]
 pub enum ReceiverAction {
     /// Wake up, you have frames to read for the BBQueue buffers.
     WakeUp,
@@ -599,6 +600,7 @@ impl DatabaseType for WordPositionDocids {
     const DATABASE: Database = Database::WordPositionDocids;
 }
 
+#[derive(Clone, Copy)]
 pub struct WordDocidsSender<'a, 'b, D> {
     sender: &'a ExtractorBbqueueSender<'b>,
     _marker: PhantomData<D>,
@@ -621,6 +623,7 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
     }
 }
 
+#[derive(Clone, Copy)]
 pub struct FacetDocidsSender<'a, 'b> {
     sender: &'a ExtractorBbqueueSender<'b>,
 }
@@ -667,6 +670,7 @@ impl FacetDocidsSender<'_, '_> {
     }
 }
 
+#[derive(Clone, Copy)]
 pub struct FieldIdDocidFacetSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
 impl FieldIdDocidFacetSender<'_, '_> {
@@ -691,6 +695,7 @@ impl FieldIdDocidFacetSender<'_, '_> {
     }
 }
 
+#[derive(Clone, Copy)]
 pub struct DocumentsSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
 impl DocumentsSender<'_, '_> {
@@ -716,6 +721,7 @@ impl DocumentsSender<'_, '_> {
     }
 }
 
+#[derive(Clone, Copy)]
 pub struct EmbeddingSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
 impl EmbeddingSender<'_, '_> {
@@ -741,6 +747,7 @@ impl EmbeddingSender<'_, '_> {
     }
 }
 
+#[derive(Clone, Copy)]
 pub struct GeoSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
 impl GeoSender<'_, '_> {
diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
index 9ad37d52c..490dada65 100644
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@@ -25,14 +25,14 @@ use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
 use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
 
-pub struct FacetedExtractorData<'a> {
+pub struct FacetedExtractorData<'a, 'b> {
     attributes_to_extract: &'a [&'a str],
-    sender: &'a FieldIdDocidFacetSender<'a>,
+    sender: &'a FieldIdDocidFacetSender<'a, 'b>,
     grenad_parameters: GrenadParameters,
     buckets: usize,
 }
 
-impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
     type Data = RefCell<BalancedCaches<'extractor>>;
 
     fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs
index 42278d443..1110432fa 100644
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -18,17 +18,17 @@ use crate::vector::error::{
 use crate::vector::{Embedder, Embedding, EmbeddingConfigs};
 use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};
 
-pub struct EmbeddingExtractor<'a> {
+pub struct EmbeddingExtractor<'a, 'b> {
     embedders: &'a EmbeddingConfigs,
-    sender: EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a, 'b>,
     possible_embedding_mistakes: PossibleEmbeddingMistakes,
     threads: &'a ThreadPoolNoAbort,
 }
 
-impl<'a> EmbeddingExtractor<'a> {
+impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
     pub fn new(
         embedders: &'a EmbeddingConfigs,
-        sender: EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
         field_distribution: &'a FieldDistribution,
         threads: &'a ThreadPoolNoAbort,
     ) -> Self {
@@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(
 
 unsafe impl MostlySend for EmbeddingExtractorData<'_> {}
 
-impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
     type Data = RefCell<EmbeddingExtractorData<'extractor>>;
 
     fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
@@ -76,7 +76,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                 context.data,
                 &self.possible_embedding_mistakes,
                 self.threads,
-                &self.sender,
+                self.sender,
                 &context.doc_alloc,
             ))
         }
@@ -259,7 +259,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
 // Currently this is the case as:
 // 1. BVec are inside of the bumaplo
 // 2. All other fields are either trivial (u8) or references.
-struct Chunks<'a, 'extractor> {
+struct Chunks<'a, 'b, 'extractor> {
     texts: BVec<'a, &'a str>,
     ids: BVec<'a, DocumentId>,
 
@@ -270,11 +270,11 @@ struct Chunks<'a, 'extractor> {
     possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
     user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
     threads: &'a ThreadPoolNoAbort,
-    sender: &'a EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a, 'b>,
     has_manual_generation: Option<&'a str>,
 }
 
-impl<'a, 'extractor> Chunks<'a, 'extractor> {
+impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
     #[allow(clippy::too_many_arguments)]
     pub fn new(
         embedder: &'a Embedder,
@@ -284,7 +284,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
         user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
         possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
         threads: &'a ThreadPoolNoAbort,
-        sender: &'a EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
         doc_alloc: &'a Bump,
     ) -> Self {
         let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
@@ -368,7 +368,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
         possible_embedding_mistakes: &PossibleEmbeddingMistakes,
         unused_vectors_distribution: &UnusedVectorsDistributionBump,
         threads: &ThreadPoolNoAbort,
-        sender: EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
         has_manual_generation: Option<&'a str>,
     ) -> Result<()> {
         if let Some(external_docid) = has_manual_generation {
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 1fd60b610..982868d93 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -80,7 +80,7 @@ where
     let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
         .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
         .collect();
-    let (extractor_sender, writer_receiver) = extractor_writer_bbqueue(&bbbuffers, 1000);
+    let (extractor_sender, mut writer_receiver) = extractor_writer_bbqueue(&bbbuffers, 1000);
     let finished_extraction = AtomicBool::new(false);
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
@@ -302,7 +302,7 @@ where
                 }
 
                 let embedding_sender = extractor_sender.embeddings();
-                let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
+                let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
                 let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
@@ -363,7 +363,6 @@ where
         let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
 
         let vector_arroy = index.vector_arroy;
-        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
         let indexer_span = tracing::Span::current();
         let arroy_writers: Result<HashMap<_, _>> = embedders
             .inner_as_ref()
@@ -490,6 +489,7 @@ where
                 Step::WritingEmbeddingsToDatabase,
             ));
 
+            let mut rng = rand::rngs::StdRng::seed_from_u64(42);
             for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
                 let dimensions = *dimensions;
                 writer.build_and_quantize(

From 08d641336588a51bf7ada203828ba2b9c19123bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 13:46:41 +0100
Subject: [PATCH 027/158] Fix result types

---
 crates/milli/src/update/new/extract/faceted/extract_facets.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
index 490dada65..f2132ce38 100644
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@@ -318,7 +318,7 @@ impl<'doc> DelAddFacetValue<'doc> {
         docid: DocumentId,
         sender: &FieldIdDocidFacetSender,
         doc_alloc: &Bump,
-    ) -> std::result::Result<(), crossbeam_channel::SendError<()>> {
+    ) -> crate::Result<()> {
         let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
         for ((fid, value), deladd) in self.strings {
             if let Ok(s) = std::str::from_utf8(&value) {

From acec45ad7c3414db493132fd37fdd951b61529b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 13:59:29 +0100
Subject: [PATCH 028/158] Send a WakeUp when writing data in the BBQueue
 buffers

---
 crates/milli/src/update/new/channel.rs     |  24 ++++
 crates/milli/src/update/new/indexer/mod.rs | 136 +++++++++++++--------
 2 files changed, 107 insertions(+), 53 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index d1d64814e..0a6d37943 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -422,6 +422,12 @@ impl<'b> ExtractorBbqueueSender<'b> {
         // We could commit only the used memory.
         grant.commit(total_length);
 
+        // We only send a wake up message when the channel is empty
+        // so that we don't fill the channel with too many WakeUps.
+        if self.sender.is_empty() {
+            self.sender.send(ReceiverAction::WakeUp).unwrap();
+        }
+
         Ok(())
     }
 
@@ -460,6 +466,12 @@ impl<'b> ExtractorBbqueueSender<'b> {
         // We could commit only the used memory.
         grant.commit(total_length);
 
+        // We only send a wake up message when the channel is empty
+        // so that we don't fill the channel with too many WakeUps.
+        if self.sender.is_empty() {
+            self.sender.send(ReceiverAction::WakeUp).unwrap();
+        }
+
         Ok(())
     }
 
@@ -511,6 +523,12 @@ impl<'b> ExtractorBbqueueSender<'b> {
         // We could commit only the used memory.
         grant.commit(total_length);
 
+        // We only send a wake up message when the channel is empty
+        // so that we don't fill the channel with too many WakeUps.
+        if self.sender.is_empty() {
+            self.sender.send(ReceiverAction::WakeUp).unwrap();
+        }
+
         Ok(())
     }
 
@@ -561,6 +579,12 @@ impl<'b> ExtractorBbqueueSender<'b> {
         // We could commit only the used memory.
         grant.commit(total_length);
 
+        // We only send a wake up message when the channel is empty
+        // so that we don't fill the channel with too many WakeUps.
+        if self.sender.is_empty() {
+            self.sender.send(ReceiverAction::WakeUp).unwrap();
+        }
+
         Ok(())
     }
 }
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 982868d93..835ee240b 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -420,61 +420,27 @@ where
                     }
                 }
 
-                while let Some(frame_with_header) = writer_receiver.read() {
-                    match frame_with_header.header() {
-                        EntryHeader::DbOperation(operation) => {
-                            let database_name = operation.database.database_name();
-                            let database = operation.database.database(index);
-                            let frame = frame_with_header.frame();
-                            match operation.key_value(frame) {
-                                (key, Some(value)) => {
-                                    if let Err(error) = database.put(wtxn, key, value) {
-                                        return Err(Error::InternalError(InternalError::StorePut {
-                                            database_name,
-                                            key: key.into(),
-                                            value_length: value.len(),
-                                            error,
-                                        }));
-                                    }
-                                }
-                                (key, None) => match database.delete(wtxn, key) {
-                                    Ok(false) => {
-                                        unreachable!("We tried to delete an unknown key: {key:?}")
-                                    }
-                                    Ok(_) => (),
-                                    Err(error) => {
-                                        return Err(Error::InternalError(
-                                            InternalError::StoreDeletion {
-                                                database_name,
-                                                key: key.into(),
-                                                error,
-                                            },
-                                        ));
-                                    }
-                                },
-                            }
-                        }
-                        EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
-                            for (_index, (_name, _embedder, writer, dimensions)) in &mut arroy_writers {
-                                let dimensions = *dimensions;
-                                writer.del_items(wtxn, dimensions, docid)?;
-                            }
-                        }
-                        EntryHeader::ArroySetVector(asv) => {
-                            let ArroySetVector { docid, embedder_id, .. } = asv;
-                            let frame = frame_with_header.frame();
-                            let embedding = asv.read_embedding_into_vec(frame, &mut aligned_embedding);
-                            let (_, _, writer, dimensions) =
-                                arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                            writer.del_items(wtxn, *dimensions, docid)?;
-                            writer.add_item(wtxn, docid, embedding)?;
-                        }
-                    }
-                }
+                // Every time the is a message in the channel we search
+                // for new entries in the BBQueue buffers.
+                write_from_bbqueue(
+                    &mut writer_receiver,
+                    index,
+                    wtxn,
+                    &arroy_writers,
+                    &mut aligned_embedding,
+                )?;
             }
-        }
 
-        todo!("read the BBQueue once the channel is closed");
+            // Once the extractor/writer channel is closed
+            // we must process the remaining BBQueue messages.
+            write_from_bbqueue(
+                &mut writer_receiver,
+                index,
+                wtxn,
+                &arroy_writers,
+                &mut aligned_embedding,
+            )?;
+        }
 
         'vectors: {
             let span =
@@ -548,6 +514,70 @@ where
     Ok(())
 }
 
+/// A function dedicated to manage all the available BBQueue frames.
+///
+/// It reads all the available frames, do the corresponding database operations
+/// and stops when no frame are available.
+fn write_from_bbqueue(
+    writer_receiver: &mut WriterBbqueueReceiver<'_>,
+    index: &Index,
+    wtxn: &mut RwTxn<'_>,
+    arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
+    aligned_embedding: &mut Vec<f32>,
+) -> crate::Result<()> {
+    while let Some(frame_with_header) = writer_receiver.read() {
+        match frame_with_header.header() {
+            EntryHeader::DbOperation(operation) => {
+                let database_name = operation.database.database_name();
+                let database = operation.database.database(index);
+                let frame = frame_with_header.frame();
+                match operation.key_value(frame) {
+                    (key, Some(value)) => {
+                        if let Err(error) = database.put(wtxn, key, value) {
+                            return Err(Error::InternalError(InternalError::StorePut {
+                                database_name,
+                                key: key.into(),
+                                value_length: value.len(),
+                                error,
+                            }));
+                        }
+                    }
+                    (key, None) => match database.delete(wtxn, key) {
+                        Ok(false) => {
+                            unreachable!("We tried to delete an unknown key: {key:?}")
+                        }
+                        Ok(_) => (),
+                        Err(error) => {
+                            return Err(Error::InternalError(InternalError::StoreDeletion {
+                                database_name,
+                                key: key.into(),
+                                error,
+                            }));
+                        }
+                    },
+                }
+            }
+            EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
+                for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
+                    let dimensions = *dimensions;
+                    writer.del_items(wtxn, dimensions, docid)?;
+                }
+            }
+            EntryHeader::ArroySetVector(asv) => {
+                let ArroySetVector { docid, embedder_id, .. } = asv;
+                let frame = frame_with_header.frame();
+                let embedding = asv.read_embedding_into_vec(frame, aligned_embedding);
+                let (_, _, writer, dimensions) =
+                    arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                writer.del_items(wtxn, *dimensions, docid)?;
+                writer.add_item(wtxn, docid, embedding)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
 fn compute_prefix_database(
     index: &Index,

From cc63802115d864ce169a3f86cf669ed356f8167d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 14:58:03 +0100
Subject: [PATCH 029/158] Modify and return the IndexEmbeddings to write them
 later

---
 crates/milli/src/update/new/indexer/mod.rs | 25 +++++++++++-----------
 crates/milli/src/update/new/steps.rs       |  4 ++--
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 835ee240b..89c1b850d 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -117,7 +117,6 @@ where
 
             let rtxn = index.read_txn()?;
 
-
             // document but we need to create a function that collects and compresses documents.
             let document_sender = extractor_sender.documents();
             let document_extractor = DocumentsExtractor::new(document_sender, embedders);
@@ -180,10 +179,6 @@ where
             }
 
             {
-
-
-
-
                 let WordDocidsCaches {
                     word_docids,
                     word_fid_docids,
@@ -296,7 +291,6 @@ where
             }
 
             'vectors: {
-
                 if index_embeddings.is_empty() {
                     break 'vectors;
                 }
@@ -308,7 +302,14 @@ where
                     let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
                     let _entered = span.enter();
 
-                    extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
+                    extract(
+                        document_changes,
+                        &extractor,
+                        indexing_context,
+                        &mut extractor_allocs,
+                        &datastore,
+                        Step::ExtractingEmbeddings,
+                    )?;
                 }
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
@@ -357,7 +358,7 @@ where
 
             finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
 
-            Result::Ok(facet_field_ids_delta)
+            Result::Ok((facet_field_ids_delta, index_embeddings))
         })?;
 
         let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);
@@ -442,6 +443,10 @@ where
             )?;
         }
 
+        (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
+
+        let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
+
         'vectors: {
             let span =
                 tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
@@ -470,10 +475,6 @@ where
             index.put_embedding_configs(wtxn, index_embeddings)?;
         }
 
-        (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
-
-        let facet_field_ids_delta = extractor_handle.join().unwrap()?;
-
         (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
 
         if index.facet_search(wtxn)? {
diff --git a/crates/milli/src/update/new/steps.rs b/crates/milli/src/update/new/steps.rs
index 7c2441933..bee1be260 100644
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@@ -11,8 +11,8 @@ pub enum Step {
     ExtractingEmbeddings,
     WritingGeoPoints,
     WritingToDatabase,
-    WritingEmbeddingsToDatabase,
     WaitingForExtractors,
+    WritingEmbeddingsToDatabase,
     PostProcessingFacets,
     PostProcessingWords,
     Finalizing,
@@ -29,8 +29,8 @@ impl Step {
             Step::ExtractingEmbeddings => "extracting embeddings",
             Step::WritingGeoPoints => "writing geo points",
             Step::WritingToDatabase => "writing to database",
-            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
             Step::WaitingForExtractors => "waiting for extractors",
+            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
             Step::PostProcessingFacets => "post-processing facets",
             Step::PostProcessingWords => "post-processing words",
             Step::Finalizing => "finalizing",

From a514ce472acfb6bbe329f01ad3be27f0c487bb20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 14:59:04 +0100
Subject: [PATCH 030/158] Make clippy happy

---
 crates/milli/src/update/new/channel.rs | 8 ++++----
 crates/milli/src/update/new/merger.rs  | 7 -------
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 0a6d37943..fc05baa89 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -382,19 +382,19 @@ impl<'b> ExtractorBbqueueSender<'b> {
     }
 
     pub fn field_id_docid_facet_sender<'a>(&'a self) -> FieldIdDocidFacetSender<'a, 'b> {
-        FieldIdDocidFacetSender(&self)
+        FieldIdDocidFacetSender(self)
     }
 
     pub fn documents<'a>(&'a self) -> DocumentsSender<'a, 'b> {
-        DocumentsSender(&self)
+        DocumentsSender(self)
     }
 
     pub fn embeddings<'a>(&'a self) -> EmbeddingSender<'a, 'b> {
-        EmbeddingSender(&self)
+        EmbeddingSender(self)
     }
 
     pub fn geo<'a>(&'a self) -> GeoSender<'a, 'b> {
-        GeoSender(&self)
+        GeoSender(self)
     }
 
     fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index f2809b376..f8af84177 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -249,10 +249,3 @@ fn merge_cbo_bitmaps(
         }
     }
 }
-
-/// TODO Return the slice directly from the serialize_into method
-fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec<u8>) -> &'b [u8] {
-    buffer.clear();
-    CboRoaringBitmapCodec::serialize_into(bitmap, buffer);
-    buffer.as_slice()
-}

From 98d4a2909e85c8ec5ba1a6dd4b2a6b2d63cf42c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 16:05:44 +0100
Subject: [PATCH 031/158] Fix the way we spawn the rayon threadpool

---
 crates/index-scheduler/src/batch.rs        | 110 +++---
 crates/milli/src/update/new/channel.rs     |  36 +-
 crates/milli/src/update/new/indexer/mod.rs | 440 +++++++++++----------
 3 files changed, 313 insertions(+), 273 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 04cdb912f..bec1fedf5 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -1351,7 +1351,10 @@ impl IndexScheduler {
                 let pool = match &indexer_config.thread_pool {
                     Some(pool) => pool,
                     None => {
-                        local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
+                        local_pool = ThreadPoolNoAbortBuilder::new()
+                            .thread_name(|i| format!("indexing-thread-{i}"))
+                            .build()
+                            .unwrap();
                         &local_pool
                     }
                 };
@@ -1399,21 +1402,19 @@ impl IndexScheduler {
                 }
 
                 if tasks.iter().any(|res| res.error.is_none()) {
-                    pool.install(|| {
-                        indexer::index(
-                            index_wtxn,
-                            index,
-                            indexer_config.grenad_parameters(),
-                            &db_fields_ids_map,
-                            new_fields_ids_map,
-                            primary_key,
-                            &document_changes,
-                            embedders,
-                            &|| must_stop_processing.get(),
-                            &send_progress,
-                        )
-                    })
-                    .unwrap()?;
+                    indexer::index(
+                        index_wtxn,
+                        index,
+                        pool,
+                        indexer_config.grenad_parameters(),
+                        &db_fields_ids_map,
+                        new_fields_ids_map,
+                        primary_key,
+                        &document_changes,
+                        embedders,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )?;
 
                     tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
@@ -1489,34 +1490,34 @@ impl IndexScheduler {
                     let pool = match &indexer_config.thread_pool {
                         Some(pool) => pool,
                         None => {
-                            local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
+                            local_pool = ThreadPoolNoAbortBuilder::new()
+                                .thread_name(|i| format!("indexing-thread-{i}"))
+                                .build()
+                                .unwrap();
                             &local_pool
                         }
                     };
 
-                    pool.install(|| {
-                        let indexer =
-                            UpdateByFunction::new(candidates, context.clone(), code.clone());
-                        let document_changes = indexer.into_changes(&primary_key)?;
-                        let embedders = index.embedding_configs(index_wtxn)?;
-                        let embedders = self.embedders(embedders)?;
+                    let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
+                    let document_changes =
+                        pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;
 
-                        indexer::index(
-                            index_wtxn,
-                            index,
-                            indexer_config.grenad_parameters(),
-                            &db_fields_ids_map,
-                            new_fields_ids_map,
-                            None, // cannot change primary key in DocumentEdition
-                            &document_changes,
-                            embedders,
-                            &|| must_stop_processing.get(),
-                            &send_progress,
-                        )?;
+                    let embedders = index.embedding_configs(index_wtxn)?;
+                    let embedders = self.embedders(embedders)?;
 
-                        Result::Ok(())
-                    })
-                    .unwrap()?;
+                    indexer::index(
+                        index_wtxn,
+                        index,
+                        pool,
+                        indexer_config.grenad_parameters(),
+                        &db_fields_ids_map,
+                        new_fields_ids_map,
+                        None, // cannot change primary key in DocumentEdition
+                        &document_changes,
+                        embedders,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )?;
 
                     // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
@@ -1641,7 +1642,10 @@ impl IndexScheduler {
                     let pool = match &indexer_config.thread_pool {
                         Some(pool) => pool,
                         None => {
-                            local_pool = ThreadPoolNoAbortBuilder::new().build().unwrap();
+                            local_pool = ThreadPoolNoAbortBuilder::new()
+                                .thread_name(|i| format!("indexing-thread-{i}"))
+                                .build()
+                                .unwrap();
                             &local_pool
                         }
                     };
@@ -1652,21 +1656,19 @@ impl IndexScheduler {
                     let embedders = index.embedding_configs(index_wtxn)?;
                     let embedders = self.embedders(embedders)?;
 
-                    pool.install(|| {
-                        indexer::index(
-                            index_wtxn,
-                            index,
-                            indexer_config.grenad_parameters(),
-                            &db_fields_ids_map,
-                            new_fields_ids_map,
-                            None, // document deletion never changes primary key
-                            &document_changes,
-                            embedders,
-                            &|| must_stop_processing.get(),
-                            &send_progress,
-                        )
-                    })
-                    .unwrap()?;
+                    indexer::index(
+                        index_wtxn,
+                        index,
+                        pool,
+                        indexer_config.grenad_parameters(),
+                        &db_fields_ids_map,
+                        new_fields_ids_map,
+                        None, // document deletion never changes primary key
+                        &document_changes,
+                        embedders,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )?;
 
                     // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index fc05baa89..beba80ac8 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -55,6 +55,12 @@ pub fn extractor_writer_bbqueue(
 
     let producers = ThreadLocal::with_capacity(bbbuffers.len());
     let consumers = rayon::broadcast(|bi| {
+        eprintln!(
+            "hello thread #{:?} (#{:?}, #{:?})",
+            bi.index(),
+            std::thread::current().name(),
+            std::thread::current().id(),
+        );
         let bbqueue = &bbbuffers[bi.index()];
         let (producer, consumer) = bbqueue.try_split_framed().unwrap();
         producers.get_or(|| FullySend(RefCell::new(producer)));
@@ -399,7 +405,15 @@ impl<'b> ExtractorBbqueueSender<'b> {
 
     fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
         let capacity = self.capacity;
-        let refcell = self.producers.get().unwrap();
+        let refcell = match self.producers.get() {
+            Some(refcell) => refcell,
+            None => panic!(
+                "hello thread #{:?} (#{:?}, #{:?})",
+                rayon::current_thread_index(),
+                std::thread::current().name(),
+                std::thread::current().id()
+            ),
+        };
         let mut producer = refcell.0.borrow_mut_or_yield();
 
         let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
@@ -438,7 +452,15 @@ impl<'b> ExtractorBbqueueSender<'b> {
         embedding: &[f32],
     ) -> crate::Result<()> {
         let capacity = self.capacity;
-        let refcell = self.producers.get().unwrap();
+        let refcell = match self.producers.get() {
+            Some(refcell) => refcell,
+            None => panic!(
+                "hello thread #{:?} (#{:?}, #{:?})",
+                rayon::current_thread_index(),
+                std::thread::current().name(),
+                std::thread::current().id()
+            ),
+        };
         let mut producer = refcell.0.borrow_mut_or_yield();
 
         let payload_header =
@@ -496,7 +518,15 @@ impl<'b> ExtractorBbqueueSender<'b> {
         F: FnOnce(&mut [u8]) -> crate::Result<()>,
     {
         let capacity = self.capacity;
-        let refcell = self.producers.get().unwrap();
+        let refcell = match self.producers.get() {
+            Some(refcell) => refcell,
+            None => panic!(
+                "hello thread #{:?} (#{:?}, #{:?})",
+                rayon::current_thread_index(),
+                std::thread::current().name(),
+                std::thread::current().id()
+            ),
+        };
         let mut producer = refcell.0.borrow_mut_or_yield();
 
         let operation = DbOperation { database, key_length: Some(key_length) };
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 89c1b850d..b7d5431b4 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -62,6 +62,7 @@ mod update_by_function;
 pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
     wtxn: &mut RwTxn,
     index: &'index Index,
+    pool: &ThreadPoolNoAbort,
     grenad_parameters: GrenadParameters,
     db_fields_ids_map: &'indexer FieldsIdsMap,
     new_fields_ids_map: FieldsIdsMap,
@@ -77,10 +78,15 @@ where
     SP: Fn(Progress) + Sync,
 {
     /// TODO restrict memory and remove this memory from the extractors bump allocators
-    let bbbuffers: Vec<_> = (0..rayon::current_num_threads())
-        .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
-        .collect();
-    let (extractor_sender, mut writer_receiver) = extractor_writer_bbqueue(&bbbuffers, 1000);
+    let bbbuffers: Vec<_> = pool
+        .install(|| {
+            (0..rayon::current_num_threads())
+                .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
+                .collect()
+        })
+        .unwrap();
+    let (extractor_sender, mut writer_receiver) =
+        pool.install(|| extractor_writer_bbqueue(&bbbuffers, 1000)).unwrap();
     let finished_extraction = AtomicBool::new(false);
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
@@ -112,253 +118,255 @@ where
         let field_distribution = &mut field_distribution;
         let document_ids = &mut document_ids;
         let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
-            let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
-            let _entered = span.enter();
-
-            let rtxn = index.read_txn()?;
-
-            // document but we need to create a function that collects and compresses documents.
-            let document_sender = extractor_sender.documents();
-            let document_extractor = DocumentsExtractor::new(document_sender, embedders);
-            let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
+            pool.install(move || {
+                let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
                 let _entered = span.enter();
-                extract(document_changes,
-                    &document_extractor,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    &datastore,
-                    Step::ExtractingDocuments,
-                )?;
-            }
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
-                let _entered = span.enter();
-                for document_extractor_data in datastore {
-                    let document_extractor_data = document_extractor_data.0.into_inner();
-                    for (field, delta) in document_extractor_data.field_distribution_delta {
-                        let current = field_distribution.entry(field).or_default();
-                        // adding the delta should never cause a negative result, as we are removing fields that previously existed.
-                        *current = current.saturating_add_signed(delta);
+
+                let rtxn = index.read_txn()?;
+
+                // document but we need to create a function that collects and compresses documents.
+                let document_sender = extractor_sender.documents();
+                let document_extractor = DocumentsExtractor::new(document_sender, embedders);
+                let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
+                    let _entered = span.enter();
+                    extract(document_changes,
+                        &document_extractor,
+                        indexing_context,
+                        &mut extractor_allocs,
+                        &datastore,
+                        Step::ExtractingDocuments,
+                    )?;
+                }
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
+                    let _entered = span.enter();
+                    for document_extractor_data in datastore {
+                        let document_extractor_data = document_extractor_data.0.into_inner();
+                        for (field, delta) in document_extractor_data.field_distribution_delta {
+                            let current = field_distribution.entry(field).or_default();
+                            // adding the delta should never cause a negative result, as we are removing fields that previously existed.
+                            *current = current.saturating_add_signed(delta);
+                        }
+                        document_extractor_data.docids_delta.apply_to(document_ids);
                     }
-                    document_extractor_data.docids_delta.apply_to(document_ids);
+
+                    field_distribution.retain(|_, v| *v != 0);
                 }
 
-                field_distribution.retain(|_, v| *v != 0);
-            }
+                let facet_field_ids_delta;
 
-            let facet_field_ids_delta;
+                {
+                    let caches = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
+                        let _entered = span.enter();
 
-            {
-                let caches = {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
-                    let _entered = span.enter();
+                        FacetedDocidsExtractor::run_extraction(
+                                grenad_parameters,
+                                document_changes,
+                                indexing_context,
+                                &mut extractor_allocs,
+                                &extractor_sender.field_id_docid_facet_sender(),
+                                Step::ExtractingFacets
+                            )?
+                    };
 
-                    FacetedDocidsExtractor::run_extraction(
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
+                        let _entered = span.enter();
+
+                        facet_field_ids_delta = merge_and_send_facet_docids(
+                            caches,
+                            FacetDatabases::new(index),
+                            index,
+                            extractor_sender.facet_docids(),
+                        )?;
+                    }
+                }
+
+                {
+                    let WordDocidsCaches {
+                        word_docids,
+                        word_fid_docids,
+                        exact_word_docids,
+                        word_position_docids,
+                        fid_word_count_docids,
+                    } = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
+                        let _entered = span.enter();
+
+                        WordDocidsExtractors::run_extraction(
                             grenad_parameters,
                             document_changes,
                             indexing_context,
                             &mut extractor_allocs,
-                            &extractor_sender.field_id_docid_facet_sender(),
-                            Step::ExtractingFacets
+                            Step::ExtractingWords
                         )?
-                };
+                    };
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
-                    let _entered = span.enter();
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_docids,
+                            index.word_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
 
-                    facet_field_ids_delta = merge_and_send_facet_docids(
-                        caches,
-                        FacetDatabases::new(index),
-                        index,
-                        extractor_sender.facet_docids(),
-                    )?;
-                }
-            }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_fid_docids,
+                            index.word_fid_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordFidDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
 
-            {
-                let WordDocidsCaches {
-                    word_docids,
-                    word_fid_docids,
-                    exact_word_docids,
-                    word_position_docids,
-                    fid_word_count_docids,
-                } = {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
-                    let _entered = span.enter();
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            exact_word_docids,
+                            index.exact_word_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<ExactWordDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
 
-                    WordDocidsExtractors::run_extraction(
-                        grenad_parameters,
-                        document_changes,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        Step::ExtractingWords
-                    )?
-                };
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_position_docids,
+                            index.word_position_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordPositionDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_docids,
-                        index.word_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            fid_word_count_docids,
+                            index.field_id_word_count_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<FidWordCountDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
                 }
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_fid_docids,
-                        index.word_fid_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordFidDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
+                // run the proximity extraction only if the precision is by word
+                // this works only if the settings didn't change during this transaction.
+                let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
+                if proximity_precision == ProximityPrecision::ByWord {
+                    let caches = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
+                        let _entered = span.enter();
+
+                        <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
+                            grenad_parameters,
+                            document_changes,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            Step::ExtractingWordProximity,
+                        )?
+                    };
+
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
+                        let _entered = span.enter();
+
+                        merge_and_send_docids(
+                            caches,
+                            index.word_pair_proximity_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordPairProximityDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
                 }
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        exact_word_docids,
-                        index.exact_word_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<ExactWordDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
+                'vectors: {
+                    if index_embeddings.is_empty() {
+                        break 'vectors;
+                    }
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_position_docids,
-                        index.word_position_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordPositionDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
+                    let embedding_sender = extractor_sender.embeddings();
+                    let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
+                    let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
+                        let _entered = span.enter();
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        fid_word_count_docids,
-                        index.field_id_word_count_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<FidWordCountDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
-            }
+                        extract(
+                            document_changes,
+                            &extractor,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &datastore,
+                            Step::ExtractingEmbeddings,
+                        )?;
+                    }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
+                        let _entered = span.enter();
 
-            // run the proximity extraction only if the precision is by word
-            // this works only if the settings didn't change during this transaction.
-            let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
-            if proximity_precision == ProximityPrecision::ByWord {
-                let caches = {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
-                    let _entered = span.enter();
-
-                    <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
-                        grenad_parameters,
-                        document_changes,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        Step::ExtractingWordProximity,
-                    )?
-                };
-
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
-                    let _entered = span.enter();
-
-                    merge_and_send_docids(
-                        caches,
-                        index.word_pair_proximity_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordPairProximityDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
-            }
-
-            'vectors: {
-                if index_embeddings.is_empty() {
-                    break 'vectors;
-                }
-
-                let embedding_sender = extractor_sender.embeddings();
-                let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
-                let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
-                    let _entered = span.enter();
-
-                    extract(
-                        document_changes,
-                        &extractor,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        &datastore,
-                        Step::ExtractingEmbeddings,
-                    )?;
-                }
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
-                    let _entered = span.enter();
-
-                    for config in &mut index_embeddings {
-                        'data: for data in datastore.iter_mut() {
-                            let data = &mut data.get_mut().0;
-                            let Some(deladd) = data.remove(&config.name) else { continue 'data; };
-                            deladd.apply_to(&mut config.user_provided);
+                        for config in &mut index_embeddings {
+                            'data: for data in datastore.iter_mut() {
+                                let data = &mut data.get_mut().0;
+                                let Some(deladd) = data.remove(&config.name) else { continue 'data; };
+                                deladd.apply_to(&mut config.user_provided);
+                            }
                         }
                     }
                 }
-            }
 
-            'geo: {
-                let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
-                    break 'geo;
-                };
-                let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                'geo: {
+                    let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
+                        break 'geo;
+                    };
+                    let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
 
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
-                    let _entered = span.enter();
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
+                        let _entered = span.enter();
 
-                    extract(
-                        document_changes,
-                        &extractor,
-                        indexing_context,
-                        &mut extractor_allocs,
-                        &datastore,
-                        Step::WritingGeoPoints
+                        extract(
+                            document_changes,
+                            &extractor,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &datastore,
+                            Step::WritingGeoPoints
+                        )?;
+                    }
+
+                    merge_and_send_rtree(
+                        datastore,
+                        &rtxn,
+                        index,
+                        extractor_sender.geo(),
+                        &indexing_context.must_stop_processing,
                     )?;
                 }
 
-                merge_and_send_rtree(
-                    datastore,
-                    &rtxn,
-                    index,
-                    extractor_sender.geo(),
-                    &indexing_context.must_stop_processing,
-                )?;
-            }
+                (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
 
-            (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
+                finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
 
-            finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
-
-            Result::Ok((facet_field_ids_delta, index_embeddings))
+                Result::Ok((facet_field_ids_delta, index_embeddings))
+            }).unwrap()
         })?;
 
         let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);

From e83534a4305963c857423cf03c3612e4e31a2b07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 16:27:43 +0100
Subject: [PATCH 032/158] Fix the indexer::index to correctly use the
 rayon::ThreadPool

---
 crates/milli/src/update/new/channel.rs     | 49 +++++-----------------
 crates/milli/src/update/new/indexer/mod.rs | 17 ++++----
 2 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index beba80ac8..70c4a6042 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -4,6 +4,7 @@ use std::mem;
 use std::num::NonZeroU16;
 
 use bbqueue::framed::{FrameGrantR, FrameProducer};
+use bbqueue::BBBuffer;
 use bytemuck::{checked, CheckedBitPattern, NoUninit};
 use crossbeam_channel::SendError;
 use heed::types::Bytes;
@@ -25,6 +26,9 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 /// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
 ///
+/// The `bbqueue_capacity` represent the number of bytes allocated
+/// to each BBQueue buffer and is not the sum of all of them.
+///
 /// The `channel_capacity` parameter defines the number of
 /// too-large-to-fit-in-BBQueue entries that can be sent through
 /// a crossbeam channel. This parameter must stay low to make
@@ -40,14 +44,11 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 /// Panics if the number of provided BBQueues is not exactly equal
 /// to the number of available threads in the rayon threadpool.
 pub fn extractor_writer_bbqueue(
-    bbbuffers: &[bbqueue::BBBuffer],
+    bbbuffers: &mut Vec<BBBuffer>,
+    bbbuffer_capacity: usize,
     channel_capacity: usize,
 ) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
-    assert_eq!(
-        bbbuffers.len(),
-        rayon::current_num_threads(),
-        "You must provide as many BBBuffer as the available number of threads to extract"
-    );
+    bbbuffers.resize_with(rayon::current_num_threads(), || BBBuffer::new(bbbuffer_capacity));
 
     let capacity = bbbuffers.first().unwrap().capacity();
     // Read the field description to understand this
@@ -55,12 +56,6 @@ pub fn extractor_writer_bbqueue(
 
     let producers = ThreadLocal::with_capacity(bbbuffers.len());
     let consumers = rayon::broadcast(|bi| {
-        eprintln!(
-            "hello thread #{:?} (#{:?}, #{:?})",
-            bi.index(),
-            std::thread::current().name(),
-            std::thread::current().id(),
-        );
         let bbqueue = &bbbuffers[bi.index()];
         let (producer, consumer) = bbqueue.try_split_framed().unwrap();
         producers.get_or(|| FullySend(RefCell::new(producer)));
@@ -405,15 +400,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
 
     fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
         let capacity = self.capacity;
-        let refcell = match self.producers.get() {
-            Some(refcell) => refcell,
-            None => panic!(
-                "hello thread #{:?} (#{:?}, #{:?})",
-                rayon::current_thread_index(),
-                std::thread::current().name(),
-                std::thread::current().id()
-            ),
-        };
+        let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
         let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
@@ -452,15 +439,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         embedding: &[f32],
     ) -> crate::Result<()> {
         let capacity = self.capacity;
-        let refcell = match self.producers.get() {
-            Some(refcell) => refcell,
-            None => panic!(
-                "hello thread #{:?} (#{:?}, #{:?})",
-                rayon::current_thread_index(),
-                std::thread::current().name(),
-                std::thread::current().id()
-            ),
-        };
+        let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
         let payload_header =
@@ -518,15 +497,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         F: FnOnce(&mut [u8]) -> crate::Result<()>,
     {
         let capacity = self.capacity;
-        let refcell = match self.producers.get() {
-            Some(refcell) => refcell,
-            None => panic!(
-                "hello thread #{:?} (#{:?}, #{:?})",
-                rayon::current_thread_index(),
-                std::thread::current().name(),
-                std::thread::current().id()
-            ),
-        };
+        let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
         let operation = DbOperation { database, key_length: Some(key_length) };
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index b7d5431b4..3a4406aef 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -77,17 +77,18 @@ where
     MSP: Fn() -> bool + Sync,
     SP: Fn(Progress) + Sync,
 {
-    /// TODO restrict memory and remove this memory from the extractors bump allocators
-    let bbbuffers: Vec<_> = pool
+    let mut bbbuffers = Vec::new();
+    let finished_extraction = AtomicBool::new(false);
+    let (extractor_sender, mut writer_receiver) = pool
         .install(|| {
-            (0..rayon::current_num_threads())
-                .map(|_| bbqueue::BBBuffer::new(100 * 1024 * 1024)) // 100 MiB by thread
-                .collect()
+            /// TODO restrict memory and remove this memory from the extractors bump allocators
+            extractor_writer_bbqueue(
+                &mut bbbuffers,
+                100 * 1024 * 1024, // 100 MiB
+                1000,
+            )
         })
         .unwrap();
-    let (extractor_sender, mut writer_receiver) =
-        pool.install(|| extractor_writer_bbqueue(&bbbuffers, 1000)).unwrap();
-    let finished_extraction = AtomicBool::new(false);
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
     let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);

From da650f834ee4fcb12d4a38a0e545f548bb06660f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 17:04:49 +0100
Subject: [PATCH 033/158] Plug the NoPanicThreadPool in the tests and
 benchmarks

---
 crates/benchmarks/benches/indexing.rs         | 31 +++++++++++++++++++
 crates/benchmarks/benches/utils.rs            |  1 +
 crates/fuzzers/src/bin/fuzz-indexing.rs       |  1 +
 crates/milli/src/index.rs                     |  3 ++
 .../milli/src/search/new/tests/integration.rs |  1 +
 .../milli/src/update/index_documents/mod.rs   | 11 +++++++
 .../milli/tests/search/facet_distribution.rs  |  1 +
 crates/milli/tests/search/mod.rs              |  1 +
 crates/milli/tests/search/query_criteria.rs   |  1 +
 crates/milli/tests/search/typo_tolerance.rs   |  1 +
 10 files changed, 52 insertions(+)

diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs
index 2f33c3454..d3f307be3 100644
--- a/crates/benchmarks/benches/indexing.rs
+++ b/crates/benchmarks/benches/indexing.rs
@@ -157,6 +157,7 @@ fn indexing_songs_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -223,6 +224,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -267,6 +269,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -335,6 +338,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -411,6 +415,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -455,6 +460,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -495,6 +501,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -562,6 +569,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -628,6 +636,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -694,6 +703,7 @@ fn indexing_wiki(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -759,6 +769,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -803,6 +814,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -870,6 +882,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -946,6 +959,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -991,6 +1005,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1032,6 +1047,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1098,6 +1114,7 @@ fn indexing_movies_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1163,6 +1180,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1207,6 +1225,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1274,6 +1293,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1321,6 +1341,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
         indexer::index(
             &mut wtxn,
             &index,
+            &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -1385,6 +1406,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1429,6 +1451,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1469,6 +1492,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1558,6 +1582,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1648,6 +1673,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1730,6 +1756,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1796,6 +1823,7 @@ fn indexing_geo(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1861,6 +1889,7 @@ fn reindexing_geo(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1905,6 +1934,7 @@ fn reindexing_geo(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1972,6 +2002,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                 indexer::index(
                     &mut wtxn,
                     &index,
+                    &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs
index 6a08b96b5..09c49ed69 100644
--- a/crates/benchmarks/benches/utils.rs
+++ b/crates/benchmarks/benches/utils.rs
@@ -117,6 +117,7 @@ pub fn base_setup(conf: &Conf) -> Index {
     indexer::index(
         &mut wtxn,
         &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
         config.grenad_parameters(),
         &db_fields_ids_map,
         new_fields_ids_map,
diff --git a/crates/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs
index f335938b9..ee927940f 100644
--- a/crates/fuzzers/src/bin/fuzz-indexing.rs
+++ b/crates/fuzzers/src/bin/fuzz-indexing.rs
@@ -135,6 +135,7 @@ fn main() {
                             indexer::index(
                                 &mut wtxn,
                                 &index,
+                                &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                                 indexer_config.grenad_parameters(),
                                 &db_fields_ids_map,
                                 new_fields_ids_map,
diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs
index fe83877a7..268d33cd9 100644
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -1821,6 +1821,7 @@ pub(crate) mod tests {
                 indexer::index(
                     wtxn,
                     &self.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     indexer_config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1911,6 +1912,7 @@ pub(crate) mod tests {
                 indexer::index(
                     wtxn,
                     &self.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     indexer_config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
@@ -1991,6 +1993,7 @@ pub(crate) mod tests {
                 indexer::index(
                     &mut wtxn,
                     &index.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                     indexer_config.grenad_parameters(),
                     &db_fields_ids_map,
                     new_fields_ids_map,
diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs
index 79668b34b..5db5b400b 100644
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@@ -83,6 +83,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
     indexer::index(
         &mut wtxn,
         &index,
+        &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
         config.grenad_parameters(),
         &db_fields_ids_map,
         new_fields_ids_map,
diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs
index 186cc501d..3988b311c 100644
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -2155,6 +2155,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2216,6 +2217,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2268,6 +2270,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2319,6 +2322,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2372,6 +2376,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2430,6 +2435,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2481,6 +2487,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2532,6 +2539,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2725,6 +2733,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2783,6 +2792,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
@@ -2838,6 +2848,7 @@ mod tests {
         indexer::index(
             &mut wtxn,
             &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
             indexer_config.grenad_parameters(),
             &db_fields_ids_map,
             new_fields_ids_map,
diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs
index 61d0697ff..418cdc356 100644
--- a/crates/milli/tests/search/facet_distribution.rs
+++ b/crates/milli/tests/search/facet_distribution.rs
@@ -64,6 +64,7 @@ fn test_facet_distribution_with_no_facet_values() {
     indexer::index(
         &mut wtxn,
         &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
         config.grenad_parameters(),
         &db_fields_ids_map,
         new_fields_ids_map,
diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs
index 1287b59d5..08b22d7b6 100644
--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@@ -101,6 +101,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
     indexer::index(
         &mut wtxn,
         &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
         config.grenad_parameters(),
         &db_fields_ids_map,
         new_fields_ids_map,
diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs
index 3e56eeff0..8401f0444 100644
--- a/crates/milli/tests/search/query_criteria.rs
+++ b/crates/milli/tests/search/query_criteria.rs
@@ -333,6 +333,7 @@ fn criteria_ascdesc() {
     indexer::index(
         &mut wtxn,
         &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
         config.grenad_parameters(),
         &db_fields_ids_map,
         new_fields_ids_map,
diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs
index 7ac9a1e4b..dbee296ee 100644
--- a/crates/milli/tests/search/typo_tolerance.rs
+++ b/crates/milli/tests/search/typo_tolerance.rs
@@ -142,6 +142,7 @@ fn test_typo_disabled_on_word() {
     indexer::index(
         &mut wtxn,
         &index,
+        &milli::ThreadPoolNoAbortBuilder::new().build().unwrap(),
         config.grenad_parameters(),
         &db_fields_ids_map,
         new_fields_ids_map,

From 5c488e20cc07a66aff3794fd94c3c84d47170b31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 18:03:45 +0100
Subject: [PATCH 034/158] Send the geo rtree through crossbeam channel

---
 crates/milli/src/update/new/channel.rs | 107 +++++++++++++------------
 1 file changed, 56 insertions(+), 51 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 70c4a6042..26e375a5a 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -166,7 +166,6 @@ pub struct DbOperation {
 
 impl DbOperation {
     pub fn key_value<'a>(&self, frame: &'a FrameGrantR<'_>) -> (&'a [u8], Option<&'a [u8]>) {
-        /// TODO replace the return type by an enum Write | Delete
         let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
         match self.key_length {
             Some(key_length) => {
@@ -478,8 +477,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
 
     fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> {
         let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
-        self.write_key_value_with(database, key_length, value.len(), |buffer| {
-            let (key_buffer, value_buffer) = buffer.split_at_mut(key.len());
+        self.write_key_value_with(database, key_length, value.len(), |key_buffer, value_buffer| {
             key_buffer.copy_from_slice(key);
             value_buffer.copy_from_slice(value);
             Ok(())
@@ -494,7 +492,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         key_value_writer: F,
     ) -> crate::Result<()>
     where
-        F: FnOnce(&mut [u8]) -> crate::Result<()>,
+        F: FnOnce(&mut [u8], &mut [u8]) -> crate::Result<()>,
     {
         let capacity = self.capacity;
         let refcell = self.producers.get().unwrap();
@@ -519,7 +517,8 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
         payload_header.serialize_into(header_bytes);
-        key_value_writer(remaining)?;
+        let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
+        key_value_writer(key_buffer, value_buffer)?;
 
         // We could commit only the used memory.
         grant.commit(total_length);
@@ -635,12 +634,16 @@ impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
     pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
         let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
-        self.sender.write_key_value_with(D::DATABASE, key_length, value_length, |buffer| {
-            let (key_buffer, value_buffer) = buffer.split_at_mut(key.len());
-            key_buffer.copy_from_slice(key);
-            CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
-            Ok(())
-        })
+        self.sender.write_key_value_with(
+            D::DATABASE,
+            key_length,
+            value_length,
+            |key_buffer, value_buffer| {
+                key_buffer.copy_from_slice(key);
+                CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_buffer)?;
+                Ok(())
+            },
+        )
     }
 
     pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
@@ -667,25 +670,29 @@ impl FacetDocidsSender<'_, '_> {
             FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_length,
         };
 
-        self.sender.write_key_value_with(database, key_length, value_length, |buffer| {
-            let (key_out, value_out) = buffer.split_at_mut(key.len());
-            key_out.copy_from_slice(key);
+        self.sender.write_key_value_with(
+            database,
+            key_length,
+            value_length,
+            |key_out, value_out| {
+                key_out.copy_from_slice(key);
 
-            let value_out = match facet_kind {
-                // We must take the facet group size into account
-                // when we serialize strings and numbers.
-                FacetKind::String | FacetKind::Number => {
-                    let (first, remaining) = value_out.split_first_mut().unwrap();
-                    *first = 1;
-                    remaining
-                }
-                FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
-            };
+                let value_out = match facet_kind {
+                    // We must take the facet group size into account
+                    // when we serialize strings and numbers.
+                    FacetKind::String | FacetKind::Number => {
+                        let (first, remaining) = value_out.split_first_mut().unwrap();
+                        *first = 1;
+                        remaining
+                    }
+                    FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_out,
+                };
 
-            CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
+                CboRoaringBitmapCodec::serialize_into_writer(bitmap, value_out)?;
 
-            Ok(())
-        })
+                Ok(())
+            },
+        )
     }
 
     pub fn delete(&self, key: &[u8]) -> crate::Result<()> {
@@ -777,32 +784,30 @@ pub struct GeoSender<'a, 'b>(&'a ExtractorBbqueueSender<'b>);
 
 impl GeoSender<'_, '_> {
     pub fn set_rtree(&self, value: Mmap) -> StdResult<(), SendError<()>> {
-        todo!("set rtree from file")
-        // self.0
-        //     .send(WriterOperation::DbOperation(DbOperation {
-        //         database: Database::Main,
-        //         entry: EntryOperation::Write(KeyValueEntry::from_large_key_value(
-        //             GEO_RTREE_KEY.as_bytes(),
-        //             value,
-        //         )),
-        //     }))
-        //     .map_err(|_| SendError(()))
+        self.0
+            .sender
+            .send(ReceiverAction::LargeEntry {
+                database: Database::Main,
+                key: GEO_RTREE_KEY.to_string().into_bytes().into_boxed_slice(),
+                value,
+            })
+            .map_err(|_| SendError(()))
     }
 
-    pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> StdResult<(), SendError<()>> {
-        todo!("serialize directly into bbqueue (as a real roaringbitmap not a cbo)")
+    pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> crate::Result<()> {
+        let key = GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes();
+        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
+        let value_length = bitmap.serialized_size();
 
-        // let mut buffer = Vec::new();
-        // bitmap.serialize_into(&mut buffer).unwrap();
-
-        // self.0
-        //     .send(WriterOperation::DbOperation(DbOperation {
-        //         database: Database::Main,
-        //         entry: EntryOperation::Write(KeyValueEntry::from_small_key_value(
-        //             GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes(),
-        //             &buffer,
-        //         )),
-        //     }))
-        //     .map_err(|_| SendError(()))
+        self.0.write_key_value_with(
+            Database::Main,
+            key_length,
+            value_length,
+            |key_buffer, value_buffer| {
+                key_buffer.copy_from_slice(key);
+                bitmap.serialize_into(value_buffer)?;
+                Ok(())
+            },
+        )
     }
 }

From 68c4717e215d12da34789d3e49e5d7223468fc4f Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 28 Nov 2024 11:34:35 +0100
Subject: [PATCH 035/158] Change the settings tests and macros to avoid
 oversights

---
 .../src/routes/indexes/settings.rs            | 514 ++++++++----------
 .../tests/settings/get_settings.rs            |  35 +-
 2 files changed, 274 insertions(+), 275 deletions(-)

diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs
index e1794535b..e08047d83 100644
--- a/crates/meilisearch/src/routes/indexes/settings.rs
+++ b/crates/meilisearch/src/routes/indexes/settings.rs
@@ -17,6 +17,26 @@ use crate::extractors::authentication::GuardedData;
 use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
 use crate::Opt;
 
+macro_rules! make_setting_routes {
+    ($({$route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident}),*) => {
+        $(
+            make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics);
+        )*
+
+        pub fn configure(cfg: &mut web::ServiceConfig) {
+            use crate::extractors::sequential_extractor::SeqHandler;
+            cfg.service(
+                web::resource("")
+                .route(web::patch().to(SeqHandler(update_all)))
+                .route(web::get().to(SeqHandler(get_all)))
+                .route(web::delete().to(SeqHandler(delete_all))))
+                $(.service($attr::resources()))*;
+        }
+
+        pub const ALL_SETTINGS_NAMES: &[&str] = &[$(stringify!($attr)),*];
+    };
+}
+
 #[macro_export]
 macro_rules! make_setting_route {
     ($route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident) => {
@@ -153,279 +173,227 @@ macro_rules! make_setting_route {
     };
 }
 
-make_setting_route!(
-    "/filterable-attributes",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
-    >,
-    filterable_attributes,
-    "filterableAttributes",
-    FilterableAttributesAnalytics
-);
-
-make_setting_route!(
-    "/sortable-attributes",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
-    >,
-    sortable_attributes,
-    "sortableAttributes",
-    SortableAttributesAnalytics
-);
-
-make_setting_route!(
-    "/displayed-attributes",
-    put,
-    Vec<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
-    >,
-    displayed_attributes,
-    "displayedAttributes",
-    DisplayedAttributesAnalytics
-);
-
-make_setting_route!(
-    "/typo-tolerance",
-    patch,
-    meilisearch_types::settings::TypoSettings,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
-    >,
-    typo_tolerance,
-    "typoTolerance",
-    TypoToleranceAnalytics
-);
-
-make_setting_route!(
-    "/searchable-attributes",
-    put,
-    Vec<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
-    >,
-    searchable_attributes,
-    "searchableAttributes",
-    SearchableAttributesAnalytics
-);
-
-make_setting_route!(
-    "/stop-words",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
-    >,
-    stop_words,
-    "stopWords",
-    StopWordsAnalytics
-);
-
-make_setting_route!(
-    "/non-separator-tokens",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
-    >,
-    non_separator_tokens,
-    "nonSeparatorTokens",
-    NonSeparatorTokensAnalytics
-);
-
-make_setting_route!(
-    "/separator-tokens",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
-    >,
-    separator_tokens,
-    "separatorTokens",
-    SeparatorTokensAnalytics
-);
-
-make_setting_route!(
-    "/dictionary",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
-    >,
-    dictionary,
-    "dictionary",
-    DictionaryAnalytics
-);
-
-make_setting_route!(
-    "/synonyms",
-    put,
-    std::collections::BTreeMap<String, Vec<String>>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
-    >,
-    synonyms,
-    "synonyms",
-    SynonymsAnalytics
-);
-
-make_setting_route!(
-    "/distinct-attribute",
-    put,
-    String,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
-    >,
-    distinct_attribute,
-    "distinctAttribute",
-    DistinctAttributeAnalytics
-);
-
-make_setting_route!(
-    "/proximity-precision",
-    put,
-    meilisearch_types::settings::ProximityPrecisionView,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
-    >,
-    proximity_precision,
-    "proximityPrecision",
-    ProximityPrecisionAnalytics
-);
-
-make_setting_route!(
-    "/localized-attributes",
-    put,
-    Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
-    >,
-    localized_attributes,
-    "localizedAttributes",
-    LocalesAnalytics
-);
-
-make_setting_route!(
-    "/ranking-rules",
-    put,
-    Vec<meilisearch_types::settings::RankingRuleView>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
-    >,
-    ranking_rules,
-    "rankingRules",
-    RankingRulesAnalytics
-);
-
-make_setting_route!(
-    "/faceting",
-    patch,
-    meilisearch_types::settings::FacetingSettings,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
-    >,
-    faceting,
-    "faceting",
-    FacetingAnalytics
-);
-
-make_setting_route!(
-    "/pagination",
-    patch,
-    meilisearch_types::settings::PaginationSettings,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
-    >,
-    pagination,
-    "pagination",
-    PaginationAnalytics
-);
-
-make_setting_route!(
-    "/embedders",
-    patch,
-    std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
-    >,
-    embedders,
-    "embedders",
-    EmbeddersAnalytics
-);
-
-make_setting_route!(
-    "/search-cutoff-ms",
-    put,
-    u64,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
-    >,
-    search_cutoff_ms,
-    "searchCutoffMs",
-    SearchCutoffMsAnalytics
-);
-
-make_setting_route!(
-    "/facet-search",
-    put,
-    bool,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
-    >,
-    facet_search,
-    "facetSearch",
-    FacetSearchAnalytics
-);
-
-make_setting_route!(
-    "/prefix-search",
-    put,
-    meilisearch_types::settings::PrefixSearchSettings,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
-    >,
-    prefix_search,
-    "prefixSearch",
-    PrefixSearchAnalytics
-);
-
-macro_rules! generate_configure {
-    ($($mod:ident),*) => {
-        pub fn configure(cfg: &mut web::ServiceConfig) {
-            use crate::extractors::sequential_extractor::SeqHandler;
-            cfg.service(
-                web::resource("")
-                .route(web::patch().to(SeqHandler(update_all)))
-                .route(web::get().to(SeqHandler(get_all)))
-                .route(web::delete().to(SeqHandler(delete_all))))
-                $(.service($mod::resources()))*;
-        }
-    };
-}
-
-generate_configure!(
-    filterable_attributes,
-    sortable_attributes,
-    displayed_attributes,
-    localized_attributes,
-    searchable_attributes,
-    distinct_attribute,
-    proximity_precision,
-    stop_words,
-    separator_tokens,
-    non_separator_tokens,
-    dictionary,
-    synonyms,
-    ranking_rules,
-    typo_tolerance,
-    pagination,
-    faceting,
-    embedders,
-    search_cutoff_ms
+make_setting_routes!(
+    {
+        "/filterable-attributes",
+        put,
+        std::collections::BTreeSet<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
+        >,
+        filterable_attributes,
+        "filterableAttributes",
+        FilterableAttributesAnalytics
+    },
+    {
+        "/sortable-attributes",
+        put,
+        std::collections::BTreeSet<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
+        >,
+        sortable_attributes,
+        "sortableAttributes",
+        SortableAttributesAnalytics
+    },
+    {
+        "/displayed-attributes",
+        put,
+        Vec<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
+        >,
+        displayed_attributes,
+        "displayedAttributes",
+        DisplayedAttributesAnalytics
+    },
+    {
+        "/typo-tolerance",
+        patch,
+        meilisearch_types::settings::TypoSettings,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
+        >,
+        typo_tolerance,
+        "typoTolerance",
+        TypoToleranceAnalytics
+    },
+    {
+        "/searchable-attributes",
+        put,
+        Vec<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
+        >,
+        searchable_attributes,
+        "searchableAttributes",
+        SearchableAttributesAnalytics
+    },
+    {
+        "/stop-words",
+        put,
+        std::collections::BTreeSet<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
+        >,
+        stop_words,
+        "stopWords",
+        StopWordsAnalytics
+    },
+    {
+        "/non-separator-tokens",
+        put,
+        std::collections::BTreeSet<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
+        >,
+        non_separator_tokens,
+        "nonSeparatorTokens",
+        NonSeparatorTokensAnalytics
+    },
+    {
+        "/separator-tokens",
+        put,
+        std::collections::BTreeSet<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
+        >,
+        separator_tokens,
+        "separatorTokens",
+        SeparatorTokensAnalytics
+    },
+    {
+        "/dictionary",
+        put,
+        std::collections::BTreeSet<String>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
+        >,
+        dictionary,
+        "dictionary",
+        DictionaryAnalytics
+    },
+    {
+        "/synonyms",
+        put,
+        std::collections::BTreeMap<String, Vec<String>>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
+        >,
+        synonyms,
+        "synonyms",
+        SynonymsAnalytics
+    },
+    {
+        "/distinct-attribute",
+        put,
+        String,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
+        >,
+        distinct_attribute,
+        "distinctAttribute",
+        DistinctAttributeAnalytics
+    },
+    {
+        "/proximity-precision",
+        put,
+        meilisearch_types::settings::ProximityPrecisionView,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
+        >,
+        proximity_precision,
+        "proximityPrecision",
+        ProximityPrecisionAnalytics
+    },
+    {
+        "/localized-attributes",
+        put,
+        Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
+        >,
+        localized_attributes,
+        "localizedAttributes",
+        LocalesAnalytics
+    },
+    {
+        "/ranking-rules",
+        put,
+        Vec<meilisearch_types::settings::RankingRuleView>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
+        >,
+        ranking_rules,
+        "rankingRules",
+        RankingRulesAnalytics
+    },
+    {
+        "/faceting",
+        patch,
+        meilisearch_types::settings::FacetingSettings,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
+        >,
+        faceting,
+        "faceting",
+        FacetingAnalytics
+    },
+    {
+        "/pagination",
+        patch,
+        meilisearch_types::settings::PaginationSettings,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
+        >,
+        pagination,
+        "pagination",
+        PaginationAnalytics
+    },
+    {
+        "/embedders",
+        patch,
+        std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
+        >,
+        embedders,
+        "embedders",
+        EmbeddersAnalytics
+    },
+    {
+        "/search-cutoff-ms",
+        put,
+        u64,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
+        >,
+        search_cutoff_ms,
+        "searchCutoffMs",
+        SearchCutoffMsAnalytics
+    },
+    {
+        "/facet-search",
+        put,
+        bool,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
+        >,
+        facet_search,
+        "facetSearch",
+        FacetSearchAnalytics
+    },
+    {
+        "/prefix-search",
+        put,
+        meilisearch_types::settings::PrefixSearchSettings,
+        meilisearch_types::deserr::DeserrJsonError<
+            meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
+        >,
+        prefix_search,
+        "prefixSearch",
+        PrefixSearchAnalytics
+    }
 );
 
 pub async fn update_all(
diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs
index 1b1964680..bb1aa861d 100644
--- a/crates/meilisearch/tests/settings/get_settings.rs
+++ b/crates/meilisearch/tests/settings/get_settings.rs
@@ -37,6 +37,23 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
         }),
     );
     map.insert("search_cutoff_ms", json!(null));
+    map.insert("embedders", json!(null));
+    map.insert("facet_search", json!(true));
+    map.insert("prefix_search", json!("indexingTime"));
+    map.insert("proximity_precision", json!("byWord"));
+    map.insert("sortable_attributes", json!([]));
+    map.insert(
+        "typo_tolerance",
+        json!({
+            "enabled": true,
+            "minWordSizeForTypos": {
+                "oneTypo": 5,
+                "twoTypos": 9
+            },
+            "disableOnWords": [],
+            "disableOnAttributes": []
+        }),
+    );
     map
 });
 
@@ -343,7 +360,7 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
 }
 
 macro_rules! test_setting_routes {
-    ($($setting:ident $write_method:ident), *) => {
+    ($($setting:ident $write_method:ident,) *) => {
         $(
             mod $setting {
                 use crate::common::Server;
@@ -409,6 +426,14 @@ macro_rules! test_setting_routes {
                 }
             }
         )*
+
+        #[actix_rt::test]
+        async fn all_setting_tested() {
+            let expected = std::collections::BTreeSet::from_iter(meilisearch::routes::indexes::settings::ALL_SETTINGS_NAMES.iter());
+            let tested = std::collections::BTreeSet::from_iter([$(stringify!($setting)),*].iter());
+            let diff: Vec<_> = expected.difference(&tested).collect();
+            assert!(diff.is_empty(), "Not all settings were tested, please add the following settings to the `test_setting_routes!` macro: {:?}", diff);
+        }
     };
 }
 
@@ -426,7 +451,13 @@ test_setting_routes!(
     synonyms put,
     pagination patch,
     faceting patch,
-    search_cutoff_ms put
+    search_cutoff_ms put,
+    embedders patch,
+    facet_search put,
+    prefix_search put,
+    proximity_precision put,
+    sortable_attributes put,
+    typo_tolerance patch,
 );
 
 #[actix_rt::test]

From 9f36ffcbdb2e09799987f9da93660b4ab27d2bcb Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 28 Nov 2024 11:44:09 +0100
Subject: [PATCH 036/158] Polish make_setting_routes!

---
 .../src/routes/indexes/settings.rs            | 284 +++++++++---------
 1 file changed, 142 insertions(+), 142 deletions(-)

diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs
index e08047d83..bb24fc880 100644
--- a/crates/meilisearch/src/routes/indexes/settings.rs
+++ b/crates/meilisearch/src/routes/indexes/settings.rs
@@ -18,7 +18,7 @@ use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
 use crate::Opt;
 
 macro_rules! make_setting_routes {
-    ($({$route:literal, $update_verb:ident, $type:ty, $err_ty:ty, $attr:ident, $camelcase_attr:literal, $analytics:ident}),*) => {
+    ($({route: $route:literal, update_verb: $update_verb:ident, value_type: $type:ty, err_type: $err_ty:ty, attr: $attr:ident, camelcase_attr: $camelcase_attr:literal, analytics: $analytics:ident},)*) => {
         $(
             make_setting_route!($route, $update_verb, $type, $err_ty, $attr, $camelcase_attr, $analytics);
         )*
@@ -175,225 +175,225 @@ macro_rules! make_setting_route {
 
 make_setting_routes!(
     {
-        "/filterable-attributes",
-        put,
-        std::collections::BTreeSet<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/filterable-attributes",
+        update_verb: put,
+        value_type: std::collections::BTreeSet<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
         >,
-        filterable_attributes,
-        "filterableAttributes",
-        FilterableAttributesAnalytics
+        attr: filterable_attributes,
+        camelcase_attr: "filterableAttributes",
+        analytics: FilterableAttributesAnalytics
     },
     {
-        "/sortable-attributes",
-        put,
-        std::collections::BTreeSet<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/sortable-attributes",
+        update_verb: put,
+        value_type: std::collections::BTreeSet<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsSortableAttributes,
         >,
-        sortable_attributes,
-        "sortableAttributes",
-        SortableAttributesAnalytics
+        attr: sortable_attributes,
+        camelcase_attr: "sortableAttributes",
+        analytics: SortableAttributesAnalytics
     },
     {
-        "/displayed-attributes",
-        put,
-        Vec<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/displayed-attributes",
+        update_verb: put,
+        value_type: Vec<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsDisplayedAttributes,
         >,
-        displayed_attributes,
-        "displayedAttributes",
-        DisplayedAttributesAnalytics
+        attr: displayed_attributes,
+        camelcase_attr: "displayedAttributes",
+        analytics: DisplayedAttributesAnalytics
     },
     {
-        "/typo-tolerance",
-        patch,
-        meilisearch_types::settings::TypoSettings,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/typo-tolerance",
+        update_verb: patch,
+        value_type: meilisearch_types::settings::TypoSettings,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsTypoTolerance,
         >,
-        typo_tolerance,
-        "typoTolerance",
-        TypoToleranceAnalytics
+        attr: typo_tolerance,
+        camelcase_attr: "typoTolerance",
+        analytics: TypoToleranceAnalytics
     },
     {
-        "/searchable-attributes",
-        put,
-        Vec<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/searchable-attributes",
+        update_verb: put,
+        value_type: Vec<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsSearchableAttributes,
         >,
-        searchable_attributes,
-        "searchableAttributes",
-        SearchableAttributesAnalytics
+        attr: searchable_attributes,
+        camelcase_attr: "searchableAttributes",
+        analytics: SearchableAttributesAnalytics
     },
     {
-        "/stop-words",
-        put,
-        std::collections::BTreeSet<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/stop-words",
+        update_verb: put,
+        value_type: std::collections::BTreeSet<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsStopWords,
         >,
-        stop_words,
-        "stopWords",
-        StopWordsAnalytics
+        attr: stop_words,
+        camelcase_attr: "stopWords",
+        analytics: StopWordsAnalytics
     },
     {
-        "/non-separator-tokens",
-        put,
-        std::collections::BTreeSet<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/non-separator-tokens",
+        update_verb: put,
+        value_type: std::collections::BTreeSet<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
         >,
-        non_separator_tokens,
-        "nonSeparatorTokens",
-        NonSeparatorTokensAnalytics
+        attr: non_separator_tokens,
+        camelcase_attr: "nonSeparatorTokens",
+        analytics: NonSeparatorTokensAnalytics
     },
     {
-        "/separator-tokens",
-        put,
-        std::collections::BTreeSet<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/separator-tokens",
+        update_verb: put,
+        value_type: std::collections::BTreeSet<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
         >,
-        separator_tokens,
-        "separatorTokens",
-        SeparatorTokensAnalytics
+        attr: separator_tokens,
+        camelcase_attr: "separatorTokens",
+        analytics: SeparatorTokensAnalytics
     },
     {
-        "/dictionary",
-        put,
-        std::collections::BTreeSet<String>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/dictionary",
+        update_verb: put,
+        value_type: std::collections::BTreeSet<String>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
         >,
-        dictionary,
-        "dictionary",
-        DictionaryAnalytics
+        attr: dictionary,
+        camelcase_attr: "dictionary",
+        analytics: DictionaryAnalytics
     },
     {
-        "/synonyms",
-        put,
-        std::collections::BTreeMap<String, Vec<String>>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/synonyms",
+        update_verb: put,
+        value_type: std::collections::BTreeMap<String, Vec<String>>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsSynonyms,
         >,
-        synonyms,
-        "synonyms",
-        SynonymsAnalytics
+        attr: synonyms,
+        camelcase_attr: "synonyms",
+        analytics: SynonymsAnalytics
     },
     {
-        "/distinct-attribute",
-        put,
-        String,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/distinct-attribute",
+        update_verb: put,
+        value_type: String,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsDistinctAttribute,
         >,
-        distinct_attribute,
-        "distinctAttribute",
-        DistinctAttributeAnalytics
+        attr: distinct_attribute,
+        camelcase_attr: "distinctAttribute",
+        analytics: DistinctAttributeAnalytics
     },
     {
-        "/proximity-precision",
-        put,
-        meilisearch_types::settings::ProximityPrecisionView,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/proximity-precision",
+        update_verb: put,
+        value_type: meilisearch_types::settings::ProximityPrecisionView,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
         >,
-        proximity_precision,
-        "proximityPrecision",
-        ProximityPrecisionAnalytics
+        attr: proximity_precision,
+        camelcase_attr: "proximityPrecision",
+        analytics: ProximityPrecisionAnalytics
     },
     {
-        "/localized-attributes",
-        put,
-        Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/localized-attributes",
+        update_verb: put,
+        value_type: Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
         >,
-        localized_attributes,
-        "localizedAttributes",
-        LocalesAnalytics
+        attr: localized_attributes,
+        camelcase_attr: "localizedAttributes",
+        analytics: LocalesAnalytics
     },
     {
-        "/ranking-rules",
-        put,
-        Vec<meilisearch_types::settings::RankingRuleView>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/ranking-rules",
+        update_verb: put,
+        value_type: Vec<meilisearch_types::settings::RankingRuleView>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsRankingRules,
         >,
-        ranking_rules,
-        "rankingRules",
-        RankingRulesAnalytics
+        attr: ranking_rules,
+        camelcase_attr: "rankingRules",
+        analytics: RankingRulesAnalytics
     },
     {
-        "/faceting",
-        patch,
-        meilisearch_types::settings::FacetingSettings,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/faceting",
+        update_verb: patch,
+        value_type: meilisearch_types::settings::FacetingSettings,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsFaceting,
         >,
-        faceting,
-        "faceting",
-        FacetingAnalytics
+        attr: faceting,
+        camelcase_attr: "faceting",
+        analytics: FacetingAnalytics
     },
     {
-        "/pagination",
-        patch,
-        meilisearch_types::settings::PaginationSettings,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/pagination",
+        update_verb: patch,
+        value_type: meilisearch_types::settings::PaginationSettings,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsPagination,
         >,
-        pagination,
-        "pagination",
-        PaginationAnalytics
+        attr: pagination,
+        camelcase_attr: "pagination",
+        analytics: PaginationAnalytics
     },
     {
-        "/embedders",
-        patch,
-        std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/embedders",
+        update_verb: patch,
+        value_type: std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
         >,
-        embedders,
-        "embedders",
-        EmbeddersAnalytics
+        attr: embedders,
+        camelcase_attr: "embedders",
+        analytics: EmbeddersAnalytics
     },
     {
-        "/search-cutoff-ms",
-        put,
-        u64,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/search-cutoff-ms",
+        update_verb: put,
+        value_type: u64,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs,
         >,
-        search_cutoff_ms,
-        "searchCutoffMs",
-        SearchCutoffMsAnalytics
+        attr: search_cutoff_ms,
+        camelcase_attr: "searchCutoffMs",
+        analytics: SearchCutoffMsAnalytics
     },
     {
-        "/facet-search",
-        put,
-        bool,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/facet-search",
+        update_verb: put,
+        value_type: bool,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsFacetSearch,
         >,
-        facet_search,
-        "facetSearch",
-        FacetSearchAnalytics
+        attr: facet_search,
+        camelcase_attr: "facetSearch",
+        analytics: FacetSearchAnalytics
     },
     {
-        "/prefix-search",
-        put,
-        meilisearch_types::settings::PrefixSearchSettings,
-        meilisearch_types::deserr::DeserrJsonError<
+        route: "/prefix-search",
+        update_verb: put,
+        value_type: meilisearch_types::settings::PrefixSearchSettings,
+        err_type: meilisearch_types::deserr::DeserrJsonError<
             meilisearch_types::error::deserr_codes::InvalidSettingsPrefixSearch,
         >,
-        prefix_search,
-        "prefixSearch",
-        PrefixSearchAnalytics
-    }
+        attr: prefix_search,
+        camelcase_attr: "prefixSearch",
+        analytics: PrefixSearchAnalytics
+    },
 );
 
 pub async fn update_all(

From 58eab9a0182323ba4ce458d026726e7253a51917 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 27 Nov 2024 18:06:43 +0100
Subject: [PATCH 037/158] Send large payload through crossbeam

---
 crates/milli/src/update/new/channel.rs     | 263 ++++++++++++++++++---
 crates/milli/src/update/new/indexer/mod.rs |  39 ++-
 2 files changed, 266 insertions(+), 36 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 26e375a5a..7eaa50df1 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -1,4 +1,5 @@
 use std::cell::RefCell;
+use std::io::{self, BufWriter};
 use std::marker::PhantomData;
 use std::mem;
 use std::num::NonZeroU16;
@@ -9,7 +10,7 @@ use bytemuck::{checked, CheckedBitPattern, NoUninit};
 use crossbeam_channel::SendError;
 use heed::types::Bytes;
 use heed::BytesDecode;
-use memmap2::Mmap;
+use memmap2::{Mmap, MmapMut};
 use roaring::RoaringBitmap;
 
 use super::extract::FacetKind;
@@ -98,20 +99,63 @@ pub struct WriterBbqueueReceiver<'a> {
 pub enum ReceiverAction {
     /// Wake up, you have frames to read for the BBQueue buffers.
     WakeUp,
-    /// An entry that cannot fit in the BBQueue buffers has been
-    /// written to disk, memory-mapped and must be written in the
-    /// database.
-    LargeEntry {
-        /// The database where the entry must be written.
-        database: Database,
-        /// The key of the entry that must be written in the database.
-        key: Box<[u8]>,
-        /// The large value that must be written.
-        ///
-        /// Note: We can probably use a `File` here and
-        /// use `Database::put_reserved` instead of memory-mapping.
-        value: Mmap,
-    },
+    LargeEntry(LargeEntry),
+    LargeVector(LargeVector),
+    LargeVectors(LargeVectors),
+}
+
+/// An entry that cannot fit in the BBQueue buffers has been
+/// written to disk, memory-mapped and must be written in the
+/// database.
+#[derive(Debug)]
+pub struct LargeEntry {
+    /// The database where the entry must be written.
+    pub database: Database,
+    /// The key of the entry that must be written in the database.
+    pub key: Box<[u8]>,
+    /// The large value that must be written.
+    ///
+    /// Note: We can probably use a `File` here and
+    /// use `Database::put_reserved` instead of memory-mapping.
+    pub value: Mmap,
+}
+
+/// When an embedding is larger than the available
+/// BBQueue space it arrives here.
+#[derive(Debug)]
+pub struct LargeVector {
+    /// The document id associated to the large embedding.
+    pub docid: DocumentId,
+    /// The embedder id in which to insert the large embedding.
+    pub embedder_id: u8,
+    /// The large embedding that must be written.
+    pub embedding: Mmap,
+}
+
+impl LargeVector {
+    pub fn read_embedding(&self) -> &[f32] {
+        bytemuck::cast_slice(&self.embedding)
+    }
+}
+
+/// When embeddings are larger than the available
+/// BBQueue space it arrives here.
+#[derive(Debug)]
+pub struct LargeVectors {
+    /// The document id associated to the large embedding.
+    pub docid: DocumentId,
+    /// The embedder id in which to insert the large embedding.
+    pub embedder_id: u8,
+    /// The dimensions of the embeddings in this payload.
+    pub dimensions: u16,
+    /// The large embedding that must be written.
+    pub embeddings: Mmap,
+}
+
+impl LargeVectors {
+    pub fn read_embeddings(&self) -> impl Iterator<Item = &[f32]> {
+        self.embeddings.chunks_exact(self.dimensions as usize).map(bytemuck::cast_slice)
+    }
 }
 
 impl<'a> WriterBbqueueReceiver<'a> {
@@ -209,12 +253,55 @@ impl ArroySetVector {
     }
 }
 
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(C)]
+/// The embeddings are in the remaining space and represents
+/// non-aligned [f32] each with dimensions f32s.
+pub struct ArroySetVectors {
+    pub docid: DocumentId,
+    pub dimensions: u16,
+    pub embedder_id: u8,
+    _padding: u8,
+}
+
+impl ArroySetVectors {
+    fn remaining_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
+        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
+        &frame[skip..]
+    }
+
+    // /// The number of embeddings in this payload.
+    // pub fn embedding_count(&self, frame: &FrameGrantR<'_>) -> usize {
+    //     let bytes = Self::remaining_bytes(frame);
+    //     bytes.len().checked_div(self.dimensions as usize).unwrap()
+    // }
+
+    /// Read the embedding at `index` or `None` if out of bounds.
+    pub fn read_embedding_into_vec<'v>(
+        &self,
+        frame: &FrameGrantR<'_>,
+        index: usize,
+        vec: &'v mut Vec<f32>,
+    ) -> Option<&'v [f32]> {
+        vec.clear();
+        let bytes = Self::remaining_bytes(frame);
+        let embedding_size = self.dimensions as usize * mem::size_of::<f32>();
+        let embedding_bytes = bytes.chunks_exact(embedding_size).nth(index)?;
+        embedding_bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
+            let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
+            vec.push(f);
+        });
+        Some(&vec[..])
+    }
+}
+
 #[derive(Debug, Clone, Copy)]
 #[repr(u8)]
 pub enum EntryHeader {
     DbOperation(DbOperation),
     ArroyDeleteVector(ArroyDeleteVector),
     ArroySetVector(ArroySetVector),
+    ArroySetVectors(ArroySetVectors),
 }
 
 impl EntryHeader {
@@ -227,6 +314,7 @@ impl EntryHeader {
             EntryHeader::DbOperation(_) => 0,
             EntryHeader::ArroyDeleteVector(_) => 1,
             EntryHeader::ArroySetVector(_) => 2,
+            EntryHeader::ArroySetVectors(_) => 3,
         }
     }
 
@@ -245,11 +333,15 @@ impl EntryHeader {
         Self::variant_size() + mem::size_of::<ArroyDeleteVector>()
     }
 
-    /// The `embedding_length` corresponds to the number of `f32` in the embedding.
-    fn total_set_vector_size(embedding_length: usize) -> usize {
-        Self::variant_size()
-            + mem::size_of::<ArroySetVector>()
-            + embedding_length * mem::size_of::<f32>()
+    /// The `dimensions` corresponds to the number of `f32` in the embedding.
+    fn total_set_vector_size(dimensions: usize) -> usize {
+        Self::variant_size() + mem::size_of::<ArroySetVector>() + dimensions * mem::size_of::<f32>()
+    }
+
+    /// The `dimensions` corresponds to the number of `f32` in the embedding.
+    fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
+        let embedding_size = dimensions * mem::size_of::<f32>();
+        Self::variant_size() + mem::size_of::<ArroySetVectors>() + embedding_size * count
     }
 
     fn header_size(&self) -> usize {
@@ -257,6 +349,7 @@ impl EntryHeader {
             EntryHeader::DbOperation(op) => mem::size_of_val(op),
             EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv),
             EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv),
+            EntryHeader::ArroySetVectors(asvs) => mem::size_of_val(asvs),
         };
         Self::variant_size() + payload_size
     }
@@ -279,6 +372,11 @@ impl EntryHeader {
                 let header = checked::pod_read_unaligned(header_bytes);
                 EntryHeader::ArroySetVector(header)
             }
+            3 => {
+                let header_bytes = &remaining[..mem::size_of::<ArroySetVectors>()];
+                let header = checked::pod_read_unaligned(header_bytes);
+                EntryHeader::ArroySetVectors(header)
+            }
             id => panic!("invalid variant id: {id}"),
         }
     }
@@ -289,6 +387,7 @@ impl EntryHeader {
             EntryHeader::DbOperation(op) => bytemuck::bytes_of(op),
             EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv),
             EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv),
+            EntryHeader::ArroySetVectors(asvs) => bytemuck::bytes_of(asvs),
         };
         *first = self.variant_id();
         remaining.copy_from_slice(payload_bytes);
@@ -405,7 +504,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
         let total_length = EntryHeader::total_delete_vector_size();
         if total_length > capacity {
-            unreachable!("entry larger that the BBQueue capacity");
+            panic!("The entry is larger ({total_length} bytes) than the BBQueue capacity ({capacity} bytes)");
         }
 
         // Spin loop to have a frame the size we requested.
@@ -441,11 +540,21 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
-        let payload_header =
-            EntryHeader::ArroySetVector(ArroySetVector { docid, embedder_id, _padding: [0; 3] });
+        let arroy_set_vector = ArroySetVector { docid, embedder_id, _padding: [0; 3] };
+        let payload_header = EntryHeader::ArroySetVector(arroy_set_vector);
         let total_length = EntryHeader::total_set_vector_size(embedding.len());
         if total_length > capacity {
-            unreachable!("entry larger that the BBQueue capacity");
+            let mut embedding_bytes = bytemuck::cast_slice(embedding);
+            let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
+            io::copy(&mut embedding_bytes, &mut value_file)?;
+            let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?;
+            value_file.sync_all()?;
+            let embedding = unsafe { Mmap::map(&value_file)? };
+
+            let large_vector = LargeVector { docid, embedder_id, embedding };
+            self.sender.send(ReceiverAction::LargeVector(large_vector)).unwrap();
+
+            return Ok(());
         }
 
         // Spin loop to have a frame the size we requested.
@@ -457,7 +566,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
             }
         };
 
-        // payload_header.serialize_into(&mut grant);
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
         payload_header.serialize_into(header_bytes);
@@ -475,6 +583,83 @@ impl<'b> ExtractorBbqueueSender<'b> {
         Ok(())
     }
 
+    fn set_vectors(
+        &self,
+        docid: u32,
+        embedder_id: u8,
+        embeddings: &[Vec<f32>],
+    ) -> crate::Result<()> {
+        let capacity = self.capacity;
+        let refcell = self.producers.get().unwrap();
+        let mut producer = refcell.0.borrow_mut_or_yield();
+
+        let dimensions = match embeddings.first() {
+            Some(embedding) => embedding.len(),
+            None => return Ok(()),
+        };
+
+        let arroy_set_vector = ArroySetVectors {
+            docid,
+            dimensions: dimensions.try_into().unwrap(),
+            embedder_id,
+            _padding: 0,
+        };
+
+        let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
+        let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
+        if total_length > capacity {
+            let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
+            for embedding in embeddings {
+                let mut embedding_bytes = bytemuck::cast_slice(embedding);
+                io::copy(&mut embedding_bytes, &mut value_file)?;
+            }
+
+            let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?;
+            value_file.sync_all()?;
+            let embeddings = unsafe { Mmap::map(&value_file)? };
+
+            let large_vectors = LargeVectors {
+                docid,
+                embedder_id,
+                dimensions: dimensions.try_into().unwrap(),
+                embeddings,
+            };
+
+            self.sender.send(ReceiverAction::LargeVectors(large_vectors)).unwrap();
+
+            return Ok(());
+        }
+
+        // Spin loop to have a frame the size we requested.
+        let mut grant = loop {
+            match producer.grant(total_length) {
+                Ok(grant) => break grant,
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            }
+        };
+
+        let header_size = payload_header.header_size();
+        let (header_bytes, remaining) = grant.split_at_mut(header_size);
+        payload_header.serialize_into(header_bytes);
+
+        let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
+        for (embedding, output) in embeddings.iter().zip(output_iter) {
+            output.copy_from_slice(bytemuck::cast_slice(embedding));
+        }
+
+        // We could commit only the used memory.
+        grant.commit(total_length);
+
+        // We only send a wake up message when the channel is empty
+        // so that we don't fill the channel with too many WakeUps.
+        if self.sender.is_empty() {
+            self.sender.send(ReceiverAction::WakeUp).unwrap();
+        }
+
+        Ok(())
+    }
+
     fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> {
         let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
         self.write_key_value_with(database, key_length, value.len(), |key_buffer, value_buffer| {
@@ -502,7 +687,22 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let payload_header = EntryHeader::DbOperation(operation);
         let total_length = EntryHeader::total_key_value_size(key_length, value_length);
         if total_length > capacity {
-            unreachable!("entry larger that the BBQueue capacity");
+            let mut key_buffer = vec![0; key_length.get() as usize].into_boxed_slice();
+            let value_file = tempfile::tempfile()?;
+            value_file.set_len(value_length.try_into().unwrap())?;
+            let mut mmap_mut = unsafe { MmapMut::map_mut(&value_file)? };
+
+            key_value_writer(&mut key_buffer, &mut mmap_mut)?;
+
+            self.sender
+                .send(ReceiverAction::LargeEntry(LargeEntry {
+                    database,
+                    key: key_buffer,
+                    value: mmap_mut.make_read_only()?,
+                }))
+                .unwrap();
+
+            return Ok(());
         }
 
         // Spin loop to have a frame the size we requested.
@@ -559,7 +759,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let payload_header = EntryHeader::DbOperation(operation);
         let total_length = EntryHeader::total_key_size(key_length);
         if total_length > capacity {
-            unreachable!("entry larger that the BBQueue capacity");
+            panic!("The entry is larger ({total_length} bytes) than the BBQueue capacity ({capacity} bytes)");
         }
 
         // Spin loop to have a frame the size we requested.
@@ -763,10 +963,7 @@ impl EmbeddingSender<'_, '_> {
         embedder_id: u8,
         embeddings: Vec<Embedding>,
     ) -> crate::Result<()> {
-        for embedding in embeddings {
-            self.set_vector(docid, embedder_id, embedding)?;
-        }
-        Ok(())
+        self.0.set_vectors(docid, embedder_id, &embeddings[..])
     }
 
     pub fn set_vector(
@@ -786,11 +983,11 @@ impl GeoSender<'_, '_> {
     pub fn set_rtree(&self, value: Mmap) -> StdResult<(), SendError<()>> {
         self.0
             .sender
-            .send(ReceiverAction::LargeEntry {
+            .send(ReceiverAction::LargeEntry(LargeEntry {
                 database: Database::Main,
                 key: GEO_RTREE_KEY.to_string().into_bytes().into_boxed_slice(),
                 value,
-            })
+            }))
             .map_err(|_| SendError(()))
     }
 
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 3a4406aef..9ad7a8f0b 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -16,6 +16,7 @@ use rand::SeedableRng as _;
 use raw_collections::RawMap;
 use time::OffsetDateTime;
 pub use update_by_function::UpdateByFunction;
+use {LargeEntry, LargeVector};
 
 use super::channel::*;
 use super::extract::*;
@@ -40,7 +41,7 @@ use crate::update::new::words_prefix_docids::compute_exact_word_prefix_docids;
 use crate::update::new::{merge_and_send_docids, merge_and_send_facet_docids, FacetDatabases};
 use crate::update::settings::InnerIndexSettings;
 use crate::update::{FacetsUpdateBulk, GrenadParameters};
-use crate::vector::{ArroyWrapper, EmbeddingConfigs};
+use crate::vector::{ArroyWrapper, EmbeddingConfigs, Embeddings};
 use crate::{
     Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
     ThreadPoolNoAbortBuilder, UserError,
@@ -132,7 +133,8 @@ where
                 {
                     let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
                     let _entered = span.enter();
-                    extract(document_changes,
+                    extract(
+                        document_changes,
                         &document_extractor,
                         indexing_context,
                         &mut extractor_allocs,
@@ -416,7 +418,7 @@ where
 
                 match action {
                     ReceiverAction::WakeUp => (),
-                    ReceiverAction::LargeEntry { database, key, value } => {
+                    ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => {
                         let database_name = database.database_name();
                         let database = database.database(index);
                         if let Err(error) = database.put(wtxn, &key, &value) {
@@ -428,6 +430,24 @@ where
                             }));
                         }
                     }
+                    ReceiverAction::LargeVector(large_vector) => {
+                        let embedding = large_vector.read_embedding();
+                        let LargeVector { docid, embedder_id, .. } = large_vector;
+                        let (_, _, writer, dimensions) =
+                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                        writer.del_items(wtxn, *dimensions, docid)?;
+                        writer.add_item(wtxn, docid, embedding)?;
+                    }
+                    ReceiverAction::LargeVectors(large_vectors) => {
+                        let LargeVectors { docid, embedder_id, .. } = large_vectors;
+                        let (_, _, writer, dimensions) =
+                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                        writer.del_items(wtxn, *dimensions, docid)?;
+                        let mut embeddings = Embeddings::new(*dimensions);
+                        for embedding in large_vectors.read_embeddings() {
+                            embeddings.push(embedding.to_vec()).unwrap();
+                        }
+                    }
                 }
 
                 // Every time the is a message in the channel we search
@@ -582,6 +602,19 @@ fn write_from_bbqueue(
                 writer.del_items(wtxn, *dimensions, docid)?;
                 writer.add_item(wtxn, docid, embedding)?;
             }
+            EntryHeader::ArroySetVectors(asvs) => {
+                let ArroySetVectors { docid, embedder_id, .. } = asvs;
+                let frame = frame_with_header.frame();
+                let (_, _, writer, dimensions) =
+                    arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                writer.del_items(wtxn, *dimensions, docid)?;
+                for index in 0.. {
+                    match asvs.read_embedding_into_vec(frame, index, aligned_embedding) {
+                        Some(embedding) => writer.add_item(wtxn, docid, embedding)?,
+                        None => break,
+                    }
+                }
+            }
         }
     }
 

From 5383f41bba83f522a43c993e6c6261042d430232 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 28 Nov 2024 11:55:38 +0100
Subject: [PATCH 038/158] Polish test_setting_routes!

---
 .../tests/settings/get_settings.rs            | 187 ++++++++++--------
 1 file changed, 105 insertions(+), 82 deletions(-)

diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs
index bb1aa861d..b9e10033a 100644
--- a/crates/meilisearch/tests/settings/get_settings.rs
+++ b/crates/meilisearch/tests/settings/get_settings.rs
@@ -1,62 +1,6 @@
-use std::collections::HashMap;
-
-use once_cell::sync::Lazy;
-
-use crate::common::{Server, Value};
+use crate::common::Server;
 use crate::json;
 
-static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|| {
-    let mut map = HashMap::new();
-    map.insert("displayed_attributes", json!(["*"]));
-    map.insert("searchable_attributes", json!(["*"]));
-    map.insert("localized_attributes", json!(null));
-    map.insert("filterable_attributes", json!([]));
-    map.insert("distinct_attribute", json!(null));
-    map.insert(
-        "ranking_rules",
-        json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
-    );
-    map.insert("stop_words", json!([]));
-    map.insert("non_separator_tokens", json!([]));
-    map.insert("separator_tokens", json!([]));
-    map.insert("dictionary", json!([]));
-    map.insert("synonyms", json!({}));
-    map.insert(
-        "faceting",
-        json!({
-            "maxValuesPerFacet": json!(100),
-            "sortFacetValuesBy": {
-                "*": "alpha"
-            }
-        }),
-    );
-    map.insert(
-        "pagination",
-        json!({
-            "maxTotalHits": json!(1000),
-        }),
-    );
-    map.insert("search_cutoff_ms", json!(null));
-    map.insert("embedders", json!(null));
-    map.insert("facet_search", json!(true));
-    map.insert("prefix_search", json!("indexingTime"));
-    map.insert("proximity_precision", json!("byWord"));
-    map.insert("sortable_attributes", json!([]));
-    map.insert(
-        "typo_tolerance",
-        json!({
-            "enabled": true,
-            "minWordSizeForTypos": {
-                "oneTypo": 5,
-                "twoTypos": 9
-            },
-            "disableOnWords": [],
-            "disableOnAttributes": []
-        }),
-    );
-    map
-});
-
 #[actix_rt::test]
 async fn get_settings_unexisting_index() {
     let server = Server::new().await;
@@ -360,11 +304,10 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
 }
 
 macro_rules! test_setting_routes {
-    ($($setting:ident $write_method:ident,) *) => {
+    ($({setting: $setting:ident, update_verb: $update_verb:ident, default_value: $default_value:tt},) *) => {
         $(
             mod $setting {
                 use crate::common::Server;
-                use super::DEFAULT_SETTINGS_VALUES;
 
                 #[actix_rt::test]
                 async fn get_unexisting_index() {
@@ -386,7 +329,7 @@ macro_rules! test_setting_routes {
                         .chars()
                         .map(|c| if c == '_' { '-' } else { c })
                         .collect::<String>());
-                    let (response, code) = server.service.$write_method(url, serde_json::Value::Null.into()).await;
+                    let (response, code) = server.service.$update_verb(url, serde_json::Value::Null.into()).await;
                     assert_eq!(code, 202, "{}", response);
                     server.index("").wait_task(0).await;
                     let (response, code) = server.index("test").get().await;
@@ -421,8 +364,8 @@ macro_rules! test_setting_routes {
                         .collect::<String>());
                     let (response, code) = server.service.get(url).await;
                     assert_eq!(code, 200, "{}", response);
-                    let expected = DEFAULT_SETTINGS_VALUES.get(stringify!($setting)).unwrap();
-                    assert_eq!(expected, &response);
+                    let expected = crate::json!($default_value);
+                    assert_eq!(expected, response);
                 }
             }
         )*
@@ -438,26 +381,106 @@ macro_rules! test_setting_routes {
 }
 
 test_setting_routes!(
-    filterable_attributes put,
-    displayed_attributes put,
-    localized_attributes put,
-    searchable_attributes put,
-    distinct_attribute put,
-    stop_words put,
-    separator_tokens put,
-    non_separator_tokens put,
-    dictionary put,
-    ranking_rules put,
-    synonyms put,
-    pagination patch,
-    faceting patch,
-    search_cutoff_ms put,
-    embedders patch,
-    facet_search put,
-    prefix_search put,
-    proximity_precision put,
-    sortable_attributes put,
-    typo_tolerance patch,
+    {
+        setting: filterable_attributes,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: displayed_attributes,
+        update_verb: put,
+        default_value: ["*"]
+    },
+    {
+        setting: localized_attributes,
+        update_verb: put,
+        default_value: null
+    },
+    {
+        setting: searchable_attributes,
+        update_verb: put,
+        default_value: ["*"]
+    },
+    {
+        setting: distinct_attribute,
+        update_verb: put,
+        default_value: null
+    },
+    {
+        setting: stop_words,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: separator_tokens,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: non_separator_tokens,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: dictionary,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: ranking_rules,
+        update_verb: put,
+        default_value: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
+    },
+    {
+        setting: synonyms,
+        update_verb: put,
+        default_value: {}
+    },
+    {
+        setting: pagination,
+        update_verb: patch,
+        default_value: {"maxTotalHits": 1000}
+    },
+    {
+        setting: faceting,
+        update_verb: patch,
+        default_value: {"maxValuesPerFacet": 100, "sortFacetValuesBy": {"*": "alpha"}}
+    },
+    {
+        setting: search_cutoff_ms,
+        update_verb: put,
+        default_value: null
+    },
+    {
+        setting: embedders,
+        update_verb: patch,
+        default_value: null
+    },
+    {
+        setting: facet_search,
+        update_verb: put,
+        default_value: true
+    },
+    {
+        setting: prefix_search,
+        update_verb: put,
+        default_value: "indexingTime"
+    },
+    {
+        setting: proximity_precision,
+        update_verb: put,
+        default_value: "byWord"
+    },
+    {
+        setting: sortable_attributes,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: typo_tolerance,
+        update_verb: patch,
+        default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []}
+    },
 );
 
 #[actix_rt::test]

From cc4bd54669b64b6fa195616fb18ca7da38c299a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 28 Nov 2024 13:53:25 +0100
Subject: [PATCH 039/158] Correctly construct the Embeddings struct

---
 crates/milli/src/update/new/channel.rs     | 14 ++++++++++++++
 crates/milli/src/update/new/indexer/mod.rs | 13 ++++++-------
 crates/milli/src/vector/mod.rs             |  2 +-
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 7eaa50df1..237c19a5c 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -293,6 +293,20 @@ impl ArroySetVectors {
         });
         Some(&vec[..])
     }
+
+    /// Read all the embeddings and write them into an aligned `f32` Vec.
+    pub fn read_all_embeddings_into_vec<'v>(
+        &self,
+        frame: &FrameGrantR<'_>,
+        vec: &'v mut Vec<f32>,
+    ) -> &'v [f32] {
+        vec.clear();
+        Self::remaining_bytes(frame).chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
+            let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
+            vec.push(f);
+        });
+        &vec[..]
+    }
 }
 
 #[derive(Debug, Clone, Copy)]
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 9ad7a8f0b..a8a94cb7c 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -442,11 +442,12 @@ where
                         let LargeVectors { docid, embedder_id, .. } = large_vectors;
                         let (_, _, writer, dimensions) =
                             arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        writer.del_items(wtxn, *dimensions, docid)?;
                         let mut embeddings = Embeddings::new(*dimensions);
                         for embedding in large_vectors.read_embeddings() {
                             embeddings.push(embedding.to_vec()).unwrap();
                         }
+                        writer.del_items(wtxn, *dimensions, docid)?;
+                        writer.add_items(wtxn, docid, &embeddings)?;
                     }
                 }
 
@@ -607,13 +608,11 @@ fn write_from_bbqueue(
                 let frame = frame_with_header.frame();
                 let (_, _, writer, dimensions) =
                     arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                let mut embeddings = Embeddings::new(*dimensions);
+                let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
+                embeddings.append(all_embeddings.to_vec()).unwrap();
                 writer.del_items(wtxn, *dimensions, docid)?;
-                for index in 0.. {
-                    match asvs.read_embedding_into_vec(frame, index, aligned_embedding) {
-                        Some(embedding) => writer.add_item(wtxn, docid, embedding)?,
-                        None => break,
-                    }
-                }
+                writer.add_items(wtxn, docid, &embeddings)?;
             }
         }
     }
diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs
index 3047e6dfc..a1d71ef93 100644
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@@ -475,7 +475,7 @@ impl<F> Embeddings<F> {
         Ok(())
     }
 
-    /// Append a flat vector of embeddings a the end of the embeddings.
+    /// Append a flat vector of embeddings at the end of the embeddings.
     ///
     /// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
     pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {

From 3dc87f5baacc649483b30d76aab251a3b8ebed30 Mon Sep 17 00:00:00 2001
From: curquiza <clementine@meilisearch.com>
Date: Thu, 28 Nov 2024 14:33:05 +0100
Subject: [PATCH 040/158] Update mini-dashboard to v0.2.16 version

---
 crates/meilisearch/Cargo.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml
index 2884f0c9c..4f357157e 100644
--- a/crates/meilisearch/Cargo.toml
+++ b/crates/meilisearch/Cargo.toml
@@ -157,5 +157,5 @@ german = ["meilisearch-types/german"]
 turkish = ["meilisearch-types/turkish"]
 
 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.15/build.zip"
-sha1 = "d057600b4a839a2e0c0be7a372cd1b2683f3ca7e"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.16/build.zip"
+sha1 = "68f83438a114aabbe76bc9fe480071e741996662"

From 096a28656ee3c1bba1900f2335e33a8a88677070 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 28 Nov 2024 15:15:06 +0100
Subject: [PATCH 041/158] Fix a bug around deleting all the vectors of a doc

---
 crates/milli/src/update/new/channel.rs      | 68 ++++++---------------
 crates/milli/src/update/new/indexer/mod.rs  |  7 ++-
 crates/milli/src/update/new/ref_cell_ext.rs |  1 +
 3 files changed, 23 insertions(+), 53 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 237c19a5c..38f436837 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -146,15 +146,13 @@ pub struct LargeVectors {
     pub docid: DocumentId,
     /// The embedder id in which to insert the large embedding.
     pub embedder_id: u8,
-    /// The dimensions of the embeddings in this payload.
-    pub dimensions: u16,
     /// The large embedding that must be written.
     pub embeddings: Mmap,
 }
 
 impl LargeVectors {
-    pub fn read_embeddings(&self) -> impl Iterator<Item = &[f32]> {
-        self.embeddings.chunks_exact(self.dimensions as usize).map(bytemuck::cast_slice)
+    pub fn read_embeddings(&self, dimensions: usize) -> impl Iterator<Item = &[f32]> {
+        self.embeddings.chunks_exact(dimensions).map(bytemuck::cast_slice)
     }
 }
 
@@ -241,15 +239,18 @@ impl ArroySetVector {
         &self,
         frame: &FrameGrantR<'_>,
         vec: &'v mut Vec<f32>,
-    ) -> &'v [f32] {
+    ) -> Option<&'v [f32]> {
         vec.clear();
         let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
         let bytes = &frame[skip..];
+        if bytes.is_empty() {
+            return None;
+        }
         bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
             let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
             vec.push(f);
         });
-        &vec[..]
+        Some(&vec[..])
     }
 }
 
@@ -259,9 +260,8 @@ impl ArroySetVector {
 /// non-aligned [f32] each with dimensions f32s.
 pub struct ArroySetVectors {
     pub docid: DocumentId,
-    pub dimensions: u16,
     pub embedder_id: u8,
-    _padding: u8,
+    _padding: [u8; 3],
 }
 
 impl ArroySetVectors {
@@ -270,30 +270,6 @@ impl ArroySetVectors {
         &frame[skip..]
     }
 
-    // /// The number of embeddings in this payload.
-    // pub fn embedding_count(&self, frame: &FrameGrantR<'_>) -> usize {
-    //     let bytes = Self::remaining_bytes(frame);
-    //     bytes.len().checked_div(self.dimensions as usize).unwrap()
-    // }
-
-    /// Read the embedding at `index` or `None` if out of bounds.
-    pub fn read_embedding_into_vec<'v>(
-        &self,
-        frame: &FrameGrantR<'_>,
-        index: usize,
-        vec: &'v mut Vec<f32>,
-    ) -> Option<&'v [f32]> {
-        vec.clear();
-        let bytes = Self::remaining_bytes(frame);
-        let embedding_size = self.dimensions as usize * mem::size_of::<f32>();
-        let embedding_bytes = bytes.chunks_exact(embedding_size).nth(index)?;
-        embedding_bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
-            let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
-            vec.push(f);
-        });
-        Some(&vec[..])
-    }
-
     /// Read all the embeddings and write them into an aligned `f32` Vec.
     pub fn read_all_embeddings_into_vec<'v>(
         &self,
@@ -607,18 +583,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
+        // If there are no vector we specify the dimensions
+        // to zero to allocate no extra space at all
         let dimensions = match embeddings.first() {
             Some(embedding) => embedding.len(),
-            None => return Ok(()),
-        };
-
-        let arroy_set_vector = ArroySetVectors {
-            docid,
-            dimensions: dimensions.try_into().unwrap(),
-            embedder_id,
-            _padding: 0,
+            None => 0,
         };
 
+        let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
         let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
         let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
         if total_length > capacity {
@@ -632,13 +604,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
             value_file.sync_all()?;
             let embeddings = unsafe { Mmap::map(&value_file)? };
 
-            let large_vectors = LargeVectors {
-                docid,
-                embedder_id,
-                dimensions: dimensions.try_into().unwrap(),
-                embeddings,
-            };
-
+            let large_vectors = LargeVectors { docid, embedder_id, embeddings };
             self.sender.send(ReceiverAction::LargeVectors(large_vectors)).unwrap();
 
             return Ok(());
@@ -657,9 +623,11 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
         payload_header.serialize_into(header_bytes);
 
-        let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
-        for (embedding, output) in embeddings.iter().zip(output_iter) {
-            output.copy_from_slice(bytemuck::cast_slice(embedding));
+        if dimensions != 0 {
+            let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
+            for (embedding, output) in embeddings.iter().zip(output_iter) {
+                output.copy_from_slice(bytemuck::cast_slice(embedding));
+            }
         }
 
         // We could commit only the used memory.
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index a8a94cb7c..07cb9d69e 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -443,7 +443,7 @@ where
                         let (_, _, writer, dimensions) =
                             arroy_writers.get(&embedder_id).expect("requested a missing embedder");
                         let mut embeddings = Embeddings::new(*dimensions);
-                        for embedding in large_vectors.read_embeddings() {
+                        for embedding in large_vectors.read_embeddings(*dimensions) {
                             embeddings.push(embedding.to_vec()).unwrap();
                         }
                         writer.del_items(wtxn, *dimensions, docid)?;
@@ -597,11 +597,12 @@ fn write_from_bbqueue(
             EntryHeader::ArroySetVector(asv) => {
                 let ArroySetVector { docid, embedder_id, .. } = asv;
                 let frame = frame_with_header.frame();
-                let embedding = asv.read_embedding_into_vec(frame, aligned_embedding);
                 let (_, _, writer, dimensions) =
                     arroy_writers.get(&embedder_id).expect("requested a missing embedder");
                 writer.del_items(wtxn, *dimensions, docid)?;
-                writer.add_item(wtxn, docid, embedding)?;
+                if let Some(embedding) = asv.read_embedding_into_vec(frame, aligned_embedding) {
+                    writer.add_item(wtxn, docid, embedding)?;
+                }
             }
             EntryHeader::ArroySetVectors(asvs) => {
                 let ArroySetVectors { docid, embedder_id, .. } = asvs;
diff --git a/crates/milli/src/update/new/ref_cell_ext.rs b/crates/milli/src/update/new/ref_cell_ext.rs
index c66f4af0a..77f5fa800 100644
--- a/crates/milli/src/update/new/ref_cell_ext.rs
+++ b/crates/milli/src/update/new/ref_cell_ext.rs
@@ -5,6 +5,7 @@ pub trait RefCellExt<T: ?Sized> {
         &self,
     ) -> std::result::Result<RefMut<'_, T>, std::cell::BorrowMutError>;
 
+    #[track_caller]
     fn borrow_mut_or_yield(&self) -> RefMut<'_, T> {
         self.try_borrow_mut_or_yield().unwrap()
     }

From 90b428a8c3d5930133870cb14d5e950baed1a1ad Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 28 Nov 2024 15:16:13 +0100
Subject: [PATCH 042/158] Apply change requests

---
 .../src/routes/indexes/settings.rs            |   6 +
 .../tests/settings/get_settings.rs            | 360 +++++++++---------
 2 files changed, 186 insertions(+), 180 deletions(-)

diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs
index bb24fc880..b2922e5ff 100644
--- a/crates/meilisearch/src/routes/indexes/settings.rs
+++ b/crates/meilisearch/src/routes/indexes/settings.rs
@@ -17,6 +17,12 @@ use crate::extractors::authentication::GuardedData;
 use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
 use crate::Opt;
 
+/// This macro generates the routes for the settings.
+///
+/// It takes a list of settings and generates a module for each setting.
+/// Each module contains the `get`, `update` and `delete` routes for the setting.
+///
+/// It also generates a `configure` function that configures the routes for the settings.
 macro_rules! make_setting_routes {
     ($({route: $route:literal, update_verb: $update_verb:ident, value_type: $type:ty, err_type: $err_ty:ty, attr: $attr:ident, camelcase_attr: $camelcase_attr:literal, analytics: $analytics:ident},)*) => {
         $(
diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs
index b9e10033a..55d9441ee 100644
--- a/crates/meilisearch/tests/settings/get_settings.rs
+++ b/crates/meilisearch/tests/settings/get_settings.rs
@@ -1,6 +1,186 @@
 use crate::common::Server;
 use crate::json;
 
+macro_rules! test_setting_routes {
+    ($({setting: $setting:ident, update_verb: $update_verb:ident, default_value: $default_value:tt},) *) => {
+        $(
+            mod $setting {
+                use crate::common::Server;
+
+                #[actix_rt::test]
+                async fn get_unexisting_index() {
+                    let server = Server::new().await;
+                    let url = format!("/indexes/test/settings/{}",
+                        stringify!($setting)
+                        .chars()
+                        .map(|c| if c == '_' { '-' } else { c })
+                        .collect::<String>());
+                    let (_response, code) = server.service.get(url).await;
+                    assert_eq!(code, 404);
+                }
+
+                #[actix_rt::test]
+                async fn update_unexisting_index() {
+                    let server = Server::new().await;
+                    let url = format!("/indexes/test/settings/{}",
+                        stringify!($setting)
+                        .chars()
+                        .map(|c| if c == '_' { '-' } else { c })
+                        .collect::<String>());
+                    let (response, code) = server.service.$update_verb(url, serde_json::Value::Null.into()).await;
+                    assert_eq!(code, 202, "{}", response);
+                    server.index("").wait_task(0).await;
+                    let (response, code) = server.index("test").get().await;
+                    assert_eq!(code, 200, "{}", response);
+                }
+
+                #[actix_rt::test]
+                async fn delete_unexisting_index() {
+                    let server = Server::new().await;
+                    let url = format!("/indexes/test/settings/{}",
+                        stringify!($setting)
+                        .chars()
+                        .map(|c| if c == '_' { '-' } else { c })
+                        .collect::<String>());
+                    let (_, code) = server.service.delete(url).await;
+                    assert_eq!(code, 202);
+                    let response = server.index("").wait_task(0).await;
+                    assert_eq!(response["status"], "failed");
+                }
+
+                #[actix_rt::test]
+                async fn get_default() {
+                    let server = Server::new().await;
+                    let index = server.index("test");
+                    let (response, code) = index.create(None).await;
+                    assert_eq!(code, 202, "{}", response);
+                    index.wait_task(0).await;
+                    let url = format!("/indexes/test/settings/{}",
+                        stringify!($setting)
+                        .chars()
+                        .map(|c| if c == '_' { '-' } else { c })
+                        .collect::<String>());
+                    let (response, code) = server.service.get(url).await;
+                    assert_eq!(code, 200, "{}", response);
+                    let expected = crate::json!($default_value);
+                    assert_eq!(expected, response);
+                }
+            }
+        )*
+
+        #[actix_rt::test]
+        async fn all_setting_tested() {
+            let expected = std::collections::BTreeSet::from_iter(meilisearch::routes::indexes::settings::ALL_SETTINGS_NAMES.iter());
+            let tested = std::collections::BTreeSet::from_iter([$(stringify!($setting)),*].iter());
+            let diff: Vec<_> = expected.difference(&tested).collect();
+            assert!(diff.is_empty(), "Not all settings were tested, please add the following settings to the `test_setting_routes!` macro: {:?}", diff);
+        }
+    };
+}
+
+test_setting_routes!(
+    {
+        setting: filterable_attributes,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: displayed_attributes,
+        update_verb: put,
+        default_value: ["*"]
+    },
+    {
+        setting: localized_attributes,
+        update_verb: put,
+        default_value: null
+    },
+    {
+        setting: searchable_attributes,
+        update_verb: put,
+        default_value: ["*"]
+    },
+    {
+        setting: distinct_attribute,
+        update_verb: put,
+        default_value: null
+    },
+    {
+        setting: stop_words,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: separator_tokens,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: non_separator_tokens,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: dictionary,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: ranking_rules,
+        update_verb: put,
+        default_value: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
+    },
+    {
+        setting: synonyms,
+        update_verb: put,
+        default_value: {}
+    },
+    {
+        setting: pagination,
+        update_verb: patch,
+        default_value: {"maxTotalHits": 1000}
+    },
+    {
+        setting: faceting,
+        update_verb: patch,
+        default_value: {"maxValuesPerFacet": 100, "sortFacetValuesBy": {"*": "alpha"}}
+    },
+    {
+        setting: search_cutoff_ms,
+        update_verb: put,
+        default_value: null
+    },
+    {
+        setting: embedders,
+        update_verb: patch,
+        default_value: null
+    },
+    {
+        setting: facet_search,
+        update_verb: put,
+        default_value: true
+    },
+    {
+        setting: prefix_search,
+        update_verb: put,
+        default_value: "indexingTime"
+    },
+    {
+        setting: proximity_precision,
+        update_verb: put,
+        default_value: "byWord"
+    },
+    {
+        setting: sortable_attributes,
+        update_verb: put,
+        default_value: []
+    },
+    {
+        setting: typo_tolerance,
+        update_verb: patch,
+        default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []}
+    },
+);
+
 #[actix_rt::test]
 async fn get_settings_unexisting_index() {
     let server = Server::new().await;
@@ -303,186 +483,6 @@ async fn error_update_setting_unexisting_index_invalid_uid() {
     "###);
 }
 
-macro_rules! test_setting_routes {
-    ($({setting: $setting:ident, update_verb: $update_verb:ident, default_value: $default_value:tt},) *) => {
-        $(
-            mod $setting {
-                use crate::common::Server;
-
-                #[actix_rt::test]
-                async fn get_unexisting_index() {
-                    let server = Server::new().await;
-                    let url = format!("/indexes/test/settings/{}",
-                        stringify!($setting)
-                        .chars()
-                        .map(|c| if c == '_' { '-' } else { c })
-                        .collect::<String>());
-                    let (_response, code) = server.service.get(url).await;
-                    assert_eq!(code, 404);
-                }
-
-                #[actix_rt::test]
-                async fn update_unexisting_index() {
-                    let server = Server::new().await;
-                    let url = format!("/indexes/test/settings/{}",
-                        stringify!($setting)
-                        .chars()
-                        .map(|c| if c == '_' { '-' } else { c })
-                        .collect::<String>());
-                    let (response, code) = server.service.$update_verb(url, serde_json::Value::Null.into()).await;
-                    assert_eq!(code, 202, "{}", response);
-                    server.index("").wait_task(0).await;
-                    let (response, code) = server.index("test").get().await;
-                    assert_eq!(code, 200, "{}", response);
-                }
-
-                #[actix_rt::test]
-                async fn delete_unexisting_index() {
-                    let server = Server::new().await;
-                    let url = format!("/indexes/test/settings/{}",
-                        stringify!($setting)
-                        .chars()
-                        .map(|c| if c == '_' { '-' } else { c })
-                        .collect::<String>());
-                    let (_, code) = server.service.delete(url).await;
-                    assert_eq!(code, 202);
-                    let response = server.index("").wait_task(0).await;
-                    assert_eq!(response["status"], "failed");
-                }
-
-                #[actix_rt::test]
-                async fn get_default() {
-                    let server = Server::new().await;
-                    let index = server.index("test");
-                    let (response, code) = index.create(None).await;
-                    assert_eq!(code, 202, "{}", response);
-                    index.wait_task(0).await;
-                    let url = format!("/indexes/test/settings/{}",
-                        stringify!($setting)
-                        .chars()
-                        .map(|c| if c == '_' { '-' } else { c })
-                        .collect::<String>());
-                    let (response, code) = server.service.get(url).await;
-                    assert_eq!(code, 200, "{}", response);
-                    let expected = crate::json!($default_value);
-                    assert_eq!(expected, response);
-                }
-            }
-        )*
-
-        #[actix_rt::test]
-        async fn all_setting_tested() {
-            let expected = std::collections::BTreeSet::from_iter(meilisearch::routes::indexes::settings::ALL_SETTINGS_NAMES.iter());
-            let tested = std::collections::BTreeSet::from_iter([$(stringify!($setting)),*].iter());
-            let diff: Vec<_> = expected.difference(&tested).collect();
-            assert!(diff.is_empty(), "Not all settings were tested, please add the following settings to the `test_setting_routes!` macro: {:?}", diff);
-        }
-    };
-}
-
-test_setting_routes!(
-    {
-        setting: filterable_attributes,
-        update_verb: put,
-        default_value: []
-    },
-    {
-        setting: displayed_attributes,
-        update_verb: put,
-        default_value: ["*"]
-    },
-    {
-        setting: localized_attributes,
-        update_verb: put,
-        default_value: null
-    },
-    {
-        setting: searchable_attributes,
-        update_verb: put,
-        default_value: ["*"]
-    },
-    {
-        setting: distinct_attribute,
-        update_verb: put,
-        default_value: null
-    },
-    {
-        setting: stop_words,
-        update_verb: put,
-        default_value: []
-    },
-    {
-        setting: separator_tokens,
-        update_verb: put,
-        default_value: []
-    },
-    {
-        setting: non_separator_tokens,
-        update_verb: put,
-        default_value: []
-    },
-    {
-        setting: dictionary,
-        update_verb: put,
-        default_value: []
-    },
-    {
-        setting: ranking_rules,
-        update_verb: put,
-        default_value: ["words", "typo", "proximity", "attribute", "sort", "exactness"]
-    },
-    {
-        setting: synonyms,
-        update_verb: put,
-        default_value: {}
-    },
-    {
-        setting: pagination,
-        update_verb: patch,
-        default_value: {"maxTotalHits": 1000}
-    },
-    {
-        setting: faceting,
-        update_verb: patch,
-        default_value: {"maxValuesPerFacet": 100, "sortFacetValuesBy": {"*": "alpha"}}
-    },
-    {
-        setting: search_cutoff_ms,
-        update_verb: put,
-        default_value: null
-    },
-    {
-        setting: embedders,
-        update_verb: patch,
-        default_value: null
-    },
-    {
-        setting: facet_search,
-        update_verb: put,
-        default_value: true
-    },
-    {
-        setting: prefix_search,
-        update_verb: put,
-        default_value: "indexingTime"
-    },
-    {
-        setting: proximity_precision,
-        update_verb: put,
-        default_value: "byWord"
-    },
-    {
-        setting: sortable_attributes,
-        update_verb: put,
-        default_value: []
-    },
-    {
-        setting: typo_tolerance,
-        update_verb: patch,
-        default_value: {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": []}
-    },
-);
-
 #[actix_rt::test]
 async fn error_set_invalid_ranking_rules() {
     let server = Server::new().await;

From b57dd5c58e2944bb607681a4adfcf0b05dd25b2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 28 Nov 2024 15:19:57 +0100
Subject: [PATCH 043/158] Remove the Vector variant and use the Vectors

---
 crates/milli/src/update/new/channel.rs     | 126 +--------------------
 crates/milli/src/update/new/indexer/mod.rs |  19 ----
 2 files changed, 4 insertions(+), 141 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 38f436837..102a27336 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -100,7 +100,6 @@ pub enum ReceiverAction {
     /// Wake up, you have frames to read for the BBQueue buffers.
     WakeUp,
     LargeEntry(LargeEntry),
-    LargeVector(LargeVector),
     LargeVectors(LargeVectors),
 }
 
@@ -120,24 +119,6 @@ pub struct LargeEntry {
     pub value: Mmap,
 }
 
-/// When an embedding is larger than the available
-/// BBQueue space it arrives here.
-#[derive(Debug)]
-pub struct LargeVector {
-    /// The document id associated to the large embedding.
-    pub docid: DocumentId,
-    /// The embedder id in which to insert the large embedding.
-    pub embedder_id: u8,
-    /// The large embedding that must be written.
-    pub embedding: Mmap,
-}
-
-impl LargeVector {
-    pub fn read_embedding(&self) -> &[f32] {
-        bytemuck::cast_slice(&self.embedding)
-    }
-}
-
 /// When embeddings are larger than the available
 /// BBQueue space it arrives here.
 #[derive(Debug)]
@@ -225,35 +206,6 @@ pub struct ArroyDeleteVector {
     pub docid: DocumentId,
 }
 
-#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
-#[repr(C)]
-/// The embedding is the remaining space and represents a non-aligned [f32].
-pub struct ArroySetVector {
-    pub docid: DocumentId,
-    pub embedder_id: u8,
-    _padding: [u8; 3],
-}
-
-impl ArroySetVector {
-    pub fn read_embedding_into_vec<'v>(
-        &self,
-        frame: &FrameGrantR<'_>,
-        vec: &'v mut Vec<f32>,
-    ) -> Option<&'v [f32]> {
-        vec.clear();
-        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
-        let bytes = &frame[skip..];
-        if bytes.is_empty() {
-            return None;
-        }
-        bytes.chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
-            let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
-            vec.push(f);
-        });
-        Some(&vec[..])
-    }
-}
-
 #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
 #[repr(C)]
 /// The embeddings are in the remaining space and represents
@@ -290,7 +242,6 @@ impl ArroySetVectors {
 pub enum EntryHeader {
     DbOperation(DbOperation),
     ArroyDeleteVector(ArroyDeleteVector),
-    ArroySetVector(ArroySetVector),
     ArroySetVectors(ArroySetVectors),
 }
 
@@ -303,8 +254,7 @@ impl EntryHeader {
         match self {
             EntryHeader::DbOperation(_) => 0,
             EntryHeader::ArroyDeleteVector(_) => 1,
-            EntryHeader::ArroySetVector(_) => 2,
-            EntryHeader::ArroySetVectors(_) => 3,
+            EntryHeader::ArroySetVectors(_) => 2,
         }
     }
 
@@ -323,11 +273,6 @@ impl EntryHeader {
         Self::variant_size() + mem::size_of::<ArroyDeleteVector>()
     }
 
-    /// The `dimensions` corresponds to the number of `f32` in the embedding.
-    fn total_set_vector_size(dimensions: usize) -> usize {
-        Self::variant_size() + mem::size_of::<ArroySetVector>() + dimensions * mem::size_of::<f32>()
-    }
-
     /// The `dimensions` corresponds to the number of `f32` in the embedding.
     fn total_set_vectors_size(count: usize, dimensions: usize) -> usize {
         let embedding_size = dimensions * mem::size_of::<f32>();
@@ -338,7 +283,6 @@ impl EntryHeader {
         let payload_size = match self {
             EntryHeader::DbOperation(op) => mem::size_of_val(op),
             EntryHeader::ArroyDeleteVector(adv) => mem::size_of_val(adv),
-            EntryHeader::ArroySetVector(asv) => mem::size_of_val(asv),
             EntryHeader::ArroySetVectors(asvs) => mem::size_of_val(asvs),
         };
         Self::variant_size() + payload_size
@@ -358,11 +302,6 @@ impl EntryHeader {
                 EntryHeader::ArroyDeleteVector(header)
             }
             2 => {
-                let header_bytes = &remaining[..mem::size_of::<ArroySetVector>()];
-                let header = checked::pod_read_unaligned(header_bytes);
-                EntryHeader::ArroySetVector(header)
-            }
-            3 => {
                 let header_bytes = &remaining[..mem::size_of::<ArroySetVectors>()];
                 let header = checked::pod_read_unaligned(header_bytes);
                 EntryHeader::ArroySetVectors(header)
@@ -376,7 +315,6 @@ impl EntryHeader {
         let payload_bytes = match self {
             EntryHeader::DbOperation(op) => bytemuck::bytes_of(op),
             EntryHeader::ArroyDeleteVector(adv) => bytemuck::bytes_of(adv),
-            EntryHeader::ArroySetVector(asv) => bytemuck::bytes_of(asv),
             EntryHeader::ArroySetVectors(asvs) => bytemuck::bytes_of(asvs),
         };
         *first = self.variant_id();
@@ -520,59 +458,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
         Ok(())
     }
 
-    fn set_vector(
-        &self,
-        docid: DocumentId,
-        embedder_id: u8,
-        embedding: &[f32],
-    ) -> crate::Result<()> {
-        let capacity = self.capacity;
-        let refcell = self.producers.get().unwrap();
-        let mut producer = refcell.0.borrow_mut_or_yield();
-
-        let arroy_set_vector = ArroySetVector { docid, embedder_id, _padding: [0; 3] };
-        let payload_header = EntryHeader::ArroySetVector(arroy_set_vector);
-        let total_length = EntryHeader::total_set_vector_size(embedding.len());
-        if total_length > capacity {
-            let mut embedding_bytes = bytemuck::cast_slice(embedding);
-            let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
-            io::copy(&mut embedding_bytes, &mut value_file)?;
-            let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?;
-            value_file.sync_all()?;
-            let embedding = unsafe { Mmap::map(&value_file)? };
-
-            let large_vector = LargeVector { docid, embedder_id, embedding };
-            self.sender.send(ReceiverAction::LargeVector(large_vector)).unwrap();
-
-            return Ok(());
-        }
-
-        // Spin loop to have a frame the size we requested.
-        let mut grant = loop {
-            match producer.grant(total_length) {
-                Ok(grant) => break grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            }
-        };
-
-        let header_size = payload_header.header_size();
-        let (header_bytes, remaining) = grant.split_at_mut(header_size);
-        payload_header.serialize_into(header_bytes);
-        remaining.copy_from_slice(bytemuck::cast_slice(embedding));
-
-        // We could commit only the used memory.
-        grant.commit(total_length);
-
-        // We only send a wake up message when the channel is empty
-        // so that we don't fill the channel with too many WakeUps.
-        if self.sender.is_empty() {
-            self.sender.send(ReceiverAction::WakeUp).unwrap();
-        }
-
-        Ok(())
-    }
-
     fn set_vectors(
         &self,
         docid: u32,
@@ -583,12 +468,9 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let refcell = self.producers.get().unwrap();
         let mut producer = refcell.0.borrow_mut_or_yield();
 
-        // If there are no vector we specify the dimensions
+        // If there are no vectors we specify the dimensions
         // to zero to allocate no extra space at all
-        let dimensions = match embeddings.first() {
-            Some(embedding) => embedding.len(),
-            None => 0,
-        };
+        let dimensions = embeddings.first().map_or(0, |emb| emb.len());
 
         let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
         let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
@@ -954,7 +836,7 @@ impl EmbeddingSender<'_, '_> {
         embedder_id: u8,
         embedding: Embedding,
     ) -> crate::Result<()> {
-        self.0.set_vector(docid, embedder_id, &embedding[..])
+        self.0.set_vectors(docid, embedder_id, &[embedding])
     }
 }
 
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 07cb9d69e..9a6b40efb 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -16,7 +16,6 @@ use rand::SeedableRng as _;
 use raw_collections::RawMap;
 use time::OffsetDateTime;
 pub use update_by_function::UpdateByFunction;
-use {LargeEntry, LargeVector};
 
 use super::channel::*;
 use super::extract::*;
@@ -430,14 +429,6 @@ where
                             }));
                         }
                     }
-                    ReceiverAction::LargeVector(large_vector) => {
-                        let embedding = large_vector.read_embedding();
-                        let LargeVector { docid, embedder_id, .. } = large_vector;
-                        let (_, _, writer, dimensions) =
-                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        writer.del_items(wtxn, *dimensions, docid)?;
-                        writer.add_item(wtxn, docid, embedding)?;
-                    }
                     ReceiverAction::LargeVectors(large_vectors) => {
                         let LargeVectors { docid, embedder_id, .. } = large_vectors;
                         let (_, _, writer, dimensions) =
@@ -594,16 +585,6 @@ fn write_from_bbqueue(
                     writer.del_items(wtxn, dimensions, docid)?;
                 }
             }
-            EntryHeader::ArroySetVector(asv) => {
-                let ArroySetVector { docid, embedder_id, .. } = asv;
-                let frame = frame_with_header.frame();
-                let (_, _, writer, dimensions) =
-                    arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                writer.del_items(wtxn, *dimensions, docid)?;
-                if let Some(embedding) = asv.read_embedding_into_vec(frame, aligned_embedding) {
-                    writer.add_item(wtxn, docid, embedding)?;
-                }
-            }
             EntryHeader::ArroySetVectors(asvs) => {
                 let ArroySetVectors { docid, embedder_id, .. } = asvs;
                 let frame = frame_with_header.frame();

From 3c7ac093d39a6fa08eaf5e34814ba967037e80ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 28 Nov 2024 15:43:14 +0100
Subject: [PATCH 044/158] Take the BBQueue capacity into account in the max
 memory

---
 crates/milli/src/update/new/channel.rs     | 11 +++++++----
 crates/milli/src/update/new/indexer/mod.rs | 23 ++++++++++++++--------
 2 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 102a27336..1a463be1e 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -27,8 +27,9 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 /// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
 ///
-/// The `bbqueue_capacity` represent the number of bytes allocated
-/// to each BBQueue buffer and is not the sum of all of them.
+/// The `total_bbbuffer_capacity` represent the number of bytes
+/// allocated to all BBQueue buffer. It will be split by the
+/// number of thread.
 ///
 /// The `channel_capacity` parameter defines the number of
 /// too-large-to-fit-in-BBQueue entries that can be sent through
@@ -46,10 +47,12 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 /// to the number of available threads in the rayon threadpool.
 pub fn extractor_writer_bbqueue(
     bbbuffers: &mut Vec<BBBuffer>,
-    bbbuffer_capacity: usize,
+    total_bbbuffer_capacity: usize,
     channel_capacity: usize,
 ) -> (ExtractorBbqueueSender, WriterBbqueueReceiver) {
-    bbbuffers.resize_with(rayon::current_num_threads(), || BBBuffer::new(bbbuffer_capacity));
+    let current_num_threads = rayon::current_num_threads();
+    let bbbuffer_capacity = total_bbbuffer_capacity.checked_div(current_num_threads).unwrap();
+    bbbuffers.resize_with(current_num_threads, || BBBuffer::new(bbbuffer_capacity));
 
     let capacity = bbbuffers.first().unwrap().capacity();
     // Read the field description to understand this
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 9a6b40efb..99ee89701 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -79,15 +79,22 @@ where
 {
     let mut bbbuffers = Vec::new();
     let finished_extraction = AtomicBool::new(false);
+
+    // We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
+    let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
+        (grenad_parameters, 100 * 1024 * 1024 * pool.current_num_threads()), // 100 MiB by thread by default
+        |max_memory| {
+            let total_bbbuffer_capacity = max_memory / 10; // 10% of the indexing memory
+            let new_grenad_parameters = GrenadParameters {
+                max_memory: Some(max_memory - total_bbbuffer_capacity),
+                ..grenad_parameters
+            };
+            (new_grenad_parameters, total_bbbuffer_capacity)
+        },
+    );
+
     let (extractor_sender, mut writer_receiver) = pool
-        .install(|| {
-            /// TODO restrict memory and remove this memory from the extractors bump allocators
-            extractor_writer_bbqueue(
-                &mut bbbuffers,
-                100 * 1024 * 1024, // 100 MiB
-                1000,
-            )
-        })
+        .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
         .unwrap();
 
     let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;

From 8a35cd1743ec4ce9e8b872bbd9bb0ede4aaad35d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 28 Nov 2024 16:00:15 +0100
Subject: [PATCH 045/158] Adjust the BBQueue buffers to use 2% instead of 10%

---
 crates/milli/src/update/new/indexer/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 99ee89701..19f1bca3e 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -84,7 +84,7 @@ where
     let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
         (grenad_parameters, 100 * 1024 * 1024 * pool.current_num_threads()), // 100 MiB by thread by default
         |max_memory| {
-            let total_bbbuffer_capacity = max_memory / 10; // 10% of the indexing memory
+            let total_bbbuffer_capacity = max_memory / (100 / 2); // 2% of the indexing memory
             let new_grenad_parameters = GrenadParameters {
                 max_memory: Some(max_memory - total_bbbuffer_capacity),
                 ..grenad_parameters

From 14ee7aa84c7fc82e6475f551b1fc9d2b4f8aaff2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 28 Nov 2024 18:02:48 +0100
Subject: [PATCH 046/158] Make sure the BBQueue is at least 50 MiB

---
 crates/milli/src/update/new/indexer/mod.rs | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 19f1bca3e..e0450ff7d 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -81,10 +81,12 @@ where
     let finished_extraction = AtomicBool::new(false);
 
     // We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
+    let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
     let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
-        (grenad_parameters, 100 * 1024 * 1024 * pool.current_num_threads()), // 100 MiB by thread by default
+        (grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
         |max_memory| {
-            let total_bbbuffer_capacity = max_memory / (100 / 2); // 2% of the indexing memory
+            // 2% of the indexing memory
+            let total_bbbuffer_capacity = (max_memory / 100 / 2).min(minimum_capacity);
             let new_grenad_parameters = GrenadParameters {
                 max_memory: Some(max_memory - total_bbbuffer_capacity),
                 ..grenad_parameters

From 13f21206a64de13202cec3c2841a8c3654b6899a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:03:01 +0100
Subject: [PATCH 047/158] Call the serialize_into_writer method from the
 serialize_into one

---
 .../roaring_bitmap/cbo_roaring_bitmap_codec.rs     | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
index cae1874dd..20a246dcd 100644
--- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -27,18 +27,8 @@ impl CboRoaringBitmapCodec {
         }
     }
 
-    pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
-        if roaring.len() <= THRESHOLD as u64 {
-            // If the number of items (u32s) to encode is less than or equal to the threshold
-            // it means that it would weigh the same or less than the RoaringBitmap
-            // header, so we directly encode them using ByteOrder instead.
-            for integer in roaring {
-                vec.write_u32::<NativeEndian>(integer).unwrap();
-            }
-        } else {
-            // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
-            roaring.serialize_into(vec).unwrap();
-        }
+    pub fn serialize_into_vec(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
+        Self::serialize_into_writer(roaring, vec).unwrap()
     }
 
     pub fn serialize_into_writer<W: io::Write>(

From db4eaf4d2de4140fde57ddfd71af80f8a4ed4826 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:03:27 +0100
Subject: [PATCH 048/158] Rename serialize_into into serialize_into_writer

---
 crates/milli/src/heed_codec/facet/mod.rs                  | 2 +-
 .../heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs | 4 ++--
 crates/milli/src/update/new/extract/cache.rs              | 8 ++++----
 crates/milli/src/update/new/words_prefix_docids.rs        | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/crates/milli/src/heed_codec/facet/mod.rs b/crates/milli/src/heed_codec/facet/mod.rs
index a8bb5055e..c0870c9fd 100644
--- a/crates/milli/src/heed_codec/facet/mod.rs
+++ b/crates/milli/src/heed_codec/facet/mod.rs
@@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
 
     fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
         let mut v = vec![value.size];
-        CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
+        CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
         Ok(Cow::Owned(v))
     }
 }
diff --git a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
index 20a246dcd..0ab162880 100644
--- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@@ -152,7 +152,7 @@ impl CboRoaringBitmapCodec {
             return Ok(None);
         }
 
-        Self::serialize_into(&previous, buffer);
+        Self::serialize_into_vec(&previous, buffer);
         Ok(Some(&buffer[..]))
     }
 }
@@ -178,7 +178,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
 
     fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
         let mut vec = Vec::with_capacity(Self::serialized_size(item));
-        Self::serialize_into(item, &mut vec);
+        Self::serialize_into_vec(item, &mut vec);
         Ok(Cow::Owned(vec))
     }
 }
diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index 26ed0eb44..be077d142 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -415,21 +415,21 @@ fn spill_entry_to_sorter(
     match deladd {
         DelAddRoaringBitmap { del: Some(del), add: None } => {
             cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
             value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
         }
         DelAddRoaringBitmap { del: None, add: Some(add) } => {
             cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
             value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
         }
         DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
             cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
             value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
 
             cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
             value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
         }
         DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
diff --git a/crates/milli/src/update/new/words_prefix_docids.rs b/crates/milli/src/update/new/words_prefix_docids.rs
index 338d22505..7e56beeae 100644
--- a/crates/milli/src/update/new/words_prefix_docids.rs
+++ b/crates/milli/src/update/new/words_prefix_docids.rs
@@ -76,7 +76,7 @@ impl WordPrefixDocids {
                 .union()?;
 
             buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&output, buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
             index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
             file.write_all(buffer)
         })?;
@@ -211,7 +211,7 @@ impl WordPrefixIntegerDocids {
                     .union()?;
 
                 buffer.clear();
-                CboRoaringBitmapCodec::serialize_into(&output, buffer);
+                CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
                 index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
                 file.write_all(buffer)?;
             }

From 76d0623b11b88c169843bbc61c1b8bff132e9d4f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:05:06 +0100
Subject: [PATCH 049/158] Reduce the number of unwraps

---
 crates/milli/src/update/new/merger.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index f8af84177..b650b6b53 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -56,7 +56,7 @@ where
 
     let rtree_mmap = unsafe { Mmap::map(&file)? };
     geo_sender.set_rtree(rtree_mmap).unwrap();
-    geo_sender.set_geo_faceted(&faceted).unwrap();
+    geo_sender.set_geo_faceted(&faceted)?;
 
     Ok(())
 }
@@ -82,11 +82,11 @@ where
             let current = database.get(&rtxn, key)?;
             match merge_cbo_bitmaps(current, del, add)? {
                 Operation::Write(bitmap) => {
-                    docids_sender.write(key, &bitmap).unwrap();
+                    docids_sender.write(key, &bitmap)?;
                     Ok(())
                 }
                 Operation::Delete => {
-                    docids_sender.delete(key).unwrap();
+                    docids_sender.delete(key)?;
                     Ok(())
                 }
                 Operation::Ignore => Ok(()),
@@ -112,12 +112,12 @@ pub fn merge_and_send_facet_docids<'extractor>(
                 match merge_cbo_bitmaps(current, del, add)? {
                     Operation::Write(bitmap) => {
                         facet_field_ids_delta.register_from_key(key);
-                        docids_sender.write(key, &bitmap).unwrap();
+                        docids_sender.write(key, &bitmap)?;
                         Ok(())
                     }
                     Operation::Delete => {
                         facet_field_ids_delta.register_from_key(key);
-                        docids_sender.delete(key).unwrap();
+                        docids_sender.delete(key)?;
                         Ok(())
                     }
                     Operation::Ignore => Ok(()),

From 5b860cb9893ded811150f9ae0332dc89f166ea6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:06:35 +0100
Subject: [PATCH 050/158] Fix english in the doc

---
 crates/milli/src/update/new/channel.rs | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 1a463be1e..7375354aa 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -27,9 +27,9 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 /// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
 ///
-/// The `total_bbbuffer_capacity` represent the number of bytes
-/// allocated to all BBQueue buffer. It will be split by the
-/// number of thread.
+/// The `total_bbbuffer_capacity` represents the number of bytes
+/// allocated to all BBQueue buffers. It will be split by the
+/// number of threads.
 ///
 /// The `channel_capacity` parameter defines the number of
 /// too-large-to-fit-in-BBQueue entries that can be sent through
@@ -37,14 +37,9 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 /// sure we do not use too much memory.
 ///
 /// Note that the channel is also used to wake-up the receiver
-/// wehn new stuff is available in any BBQueue buffer but we send
+/// when new stuff is available in any BBQueue buffer but we send
 /// a message in this queue only if it is empty to avoid filling
 /// the channel *and* the BBQueue.
-///
-/// # Safety
-///
-/// Panics if the number of provided BBQueues is not exactly equal
-/// to the number of available threads in the rayon threadpool.
 pub fn extractor_writer_bbqueue(
     bbbuffers: &mut Vec<BBBuffer>,
     total_bbbuffer_capacity: usize,
@@ -82,7 +77,7 @@ pub struct ExtractorBbqueueSender<'a> {
     /// The capacity of this frame producer, will never be able to store more than that.
     ///
     /// Note that the FrameProducer requires up to 9 bytes to encode the length,
-    /// the capacity has been shrinked accordingly.
+    /// the capacity has been shrunk accordingly.
     ///
     /// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
     capacity: usize,

From 30eb0e5b5baad02475a73c5ae16f3a1713bd21a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:08:01 +0100
Subject: [PATCH 051/158] Rename recv and read methods to recv_action and
 recv_frame

---
 crates/milli/src/update/new/channel.rs     | 4 ++--
 crates/milli/src/update/new/indexer/mod.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 7375354aa..82e483d18 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -136,11 +136,11 @@ impl LargeVectors {
 }
 
 impl<'a> WriterBbqueueReceiver<'a> {
-    pub fn recv(&mut self) -> Option<ReceiverAction> {
+    pub fn recv_action(&mut self) -> Option<ReceiverAction> {
         self.receiver.recv().ok()
     }
 
-    pub fn read(&mut self) -> Option<FrameWithHeader<'a>> {
+    pub fn recv_frame(&mut self) -> Option<FrameWithHeader<'a>> {
         for consumer in &mut self.consumers {
             if let Some(frame) = consumer.read() {
                 return Some(FrameWithHeader::from(frame));
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index e0450ff7d..bd3fedae2 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -417,7 +417,7 @@ where
             let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
             let mut _entered_post_merge = None;
 
-            while let Some(action) = writer_receiver.recv() {
+            while let Some(action) = writer_receiver.recv_action() {
                 if _entered_post_merge.is_none()
                     && finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
                 {
@@ -556,7 +556,7 @@ fn write_from_bbqueue(
     arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
     aligned_embedding: &mut Vec<f32>,
 ) -> crate::Result<()> {
-    while let Some(frame_with_header) = writer_receiver.read() {
+    while let Some(frame_with_header) = writer_receiver.recv_frame() {
         match frame_with_header.header() {
             EntryHeader::DbOperation(operation) => {
                 let database_name = operation.database.database_name();

From 5df5eb2db26159f79a0cedaea575bb9a79e098c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:10:48 +0100
Subject: [PATCH 052/158] Clarify a method name

---
 crates/milli/src/update/new/channel.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 82e483d18..7b083341b 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -215,7 +215,7 @@ pub struct ArroySetVectors {
 }
 
 impl ArroySetVectors {
-    fn remaining_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
+    fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
         let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
         &frame[skip..]
     }
@@ -227,7 +227,7 @@ impl ArroySetVectors {
         vec: &'v mut Vec<f32>,
     ) -> &'v [f32] {
         vec.clear();
-        Self::remaining_bytes(frame).chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
+        Self::embeddings_bytes(frame).chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
             let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
             vec.push(f);
         });

From f7f9a131e400bc995d7ef152e559b1e70ecd85e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:15:58 +0100
Subject: [PATCH 053/158] Improve copying bytes into aligned memory area

---
 crates/milli/src/update/new/channel.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 7b083341b..7a997c3af 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -226,11 +226,10 @@ impl ArroySetVectors {
         frame: &FrameGrantR<'_>,
         vec: &'v mut Vec<f32>,
     ) -> &'v [f32] {
-        vec.clear();
-        Self::embeddings_bytes(frame).chunks_exact(mem::size_of::<f32>()).for_each(|bytes| {
-            let f = bytes.try_into().map(f32::from_ne_bytes).unwrap();
-            vec.push(f);
-        });
+        let embeddings_bytes = Self::embeddings_bytes(frame);
+        let embeddings_count = embeddings_bytes.len() / mem::size_of::<f32>();
+        vec.resize(embeddings_count, 0.0);
+        bytemuck::cast_slice_mut(vec.as_mut()).copy_from_slice(embeddings_bytes);
         &vec[..]
     }
 }

From be7d2fbe63070066538a6450d5e46803990169b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:19:11 +0100
Subject: [PATCH 054/158] Move the EntryHeader up in the file and document the
 safety related to the size

---
 crates/milli/src/update/new/channel.rs | 128 +++++++++++++------------
 1 file changed, 66 insertions(+), 62 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 7a997c3af..bebaad686 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -172,68 +172,10 @@ impl<'a> From<FrameGrantR<'a>> for FrameWithHeader<'a> {
     }
 }
 
-#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
-#[repr(C)]
-/// Wether a put of the key/value pair or a delete of the given key.
-pub struct DbOperation {
-    /// The database on which to perform the operation.
-    pub database: Database,
-    /// The key length in the buffer.
-    ///
-    /// If None it means that the buffer is dedicated
-    /// to the key and it is therefore a deletion operation.
-    pub key_length: Option<NonZeroU16>,
-}
-
-impl DbOperation {
-    pub fn key_value<'a>(&self, frame: &'a FrameGrantR<'_>) -> (&'a [u8], Option<&'a [u8]>) {
-        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
-        match self.key_length {
-            Some(key_length) => {
-                let (key, value) = frame[skip..].split_at(key_length.get() as usize);
-                (key, Some(value))
-            }
-            None => (&frame[skip..], None),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
-#[repr(transparent)]
-pub struct ArroyDeleteVector {
-    pub docid: DocumentId,
-}
-
-#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
-#[repr(C)]
-/// The embeddings are in the remaining space and represents
-/// non-aligned [f32] each with dimensions f32s.
-pub struct ArroySetVectors {
-    pub docid: DocumentId,
-    pub embedder_id: u8,
-    _padding: [u8; 3],
-}
-
-impl ArroySetVectors {
-    fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
-        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
-        &frame[skip..]
-    }
-
-    /// Read all the embeddings and write them into an aligned `f32` Vec.
-    pub fn read_all_embeddings_into_vec<'v>(
-        &self,
-        frame: &FrameGrantR<'_>,
-        vec: &'v mut Vec<f32>,
-    ) -> &'v [f32] {
-        let embeddings_bytes = Self::embeddings_bytes(frame);
-        let embeddings_count = embeddings_bytes.len() / mem::size_of::<f32>();
-        vec.resize(embeddings_count, 0.0);
-        bytemuck::cast_slice_mut(vec.as_mut()).copy_from_slice(embeddings_bytes);
-        &vec[..]
-    }
-}
-
+/// A header that is written at the beginning of a bbqueue frame.
+///
+/// Note that the different variants cannot be changed without taking
+/// care of their size in the implementation, like, everywhere.
 #[derive(Debug, Clone, Copy)]
 #[repr(u8)]
 pub enum EntryHeader {
@@ -319,6 +261,68 @@ impl EntryHeader {
     }
 }
 
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(C)]
+/// Wether a put of the key/value pair or a delete of the given key.
+pub struct DbOperation {
+    /// The database on which to perform the operation.
+    pub database: Database,
+    /// The key length in the buffer.
+    ///
+    /// If None it means that the buffer is dedicated
+    /// to the key and it is therefore a deletion operation.
+    pub key_length: Option<NonZeroU16>,
+}
+
+impl DbOperation {
+    pub fn key_value<'a>(&self, frame: &'a FrameGrantR<'_>) -> (&'a [u8], Option<&'a [u8]>) {
+        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
+        match self.key_length {
+            Some(key_length) => {
+                let (key, value) = frame[skip..].split_at(key_length.get() as usize);
+                (key, Some(value))
+            }
+            None => (&frame[skip..], None),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(transparent)]
+pub struct ArroyDeleteVector {
+    pub docid: DocumentId,
+}
+
+#[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
+#[repr(C)]
+/// The embeddings are in the remaining space and represents
+/// non-aligned [f32] each with dimensions f32s.
+pub struct ArroySetVectors {
+    pub docid: DocumentId,
+    pub embedder_id: u8,
+    _padding: [u8; 3],
+}
+
+impl ArroySetVectors {
+    fn embeddings_bytes<'a>(frame: &'a FrameGrantR<'_>) -> &'a [u8] {
+        let skip = EntryHeader::variant_size() + mem::size_of::<Self>();
+        &frame[skip..]
+    }
+
+    /// Read all the embeddings and write them into an aligned `f32` Vec.
+    pub fn read_all_embeddings_into_vec<'v>(
+        &self,
+        frame: &FrameGrantR<'_>,
+        vec: &'v mut Vec<f32>,
+    ) -> &'v [f32] {
+        let embeddings_bytes = Self::embeddings_bytes(frame);
+        let embeddings_count = embeddings_bytes.len() / mem::size_of::<f32>();
+        vec.resize(embeddings_count, 0.0);
+        bytemuck::cast_slice_mut(vec.as_mut()).copy_from_slice(embeddings_bytes);
+        &vec[..]
+    }
+}
+
 #[derive(Debug, Clone, Copy, NoUninit, CheckedBitPattern)]
 #[repr(u16)]
 pub enum Database {

From 263c5a348ee321559b8b98789d70be9950d6ec83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:33:49 +0100
Subject: [PATCH 055/158] Move the spin looping for BBQueue frames into a
 dedicated function

---
 Cargo.lock                             | 13 +++++
 crates/milli/Cargo.toml                |  1 +
 crates/milli/src/update/new/channel.rs | 79 ++++++++++++--------------
 3 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 8a0a6b3d0..038b269ce 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1910,6 +1910,15 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "flume"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095"
+dependencies = [
+ "spin",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -3623,6 +3632,7 @@ dependencies = [
  "enum-iterator",
  "filter-parser",
  "flatten-serde-json",
+ "flume",
  "fst",
  "fxhash",
  "geoutils",
@@ -5180,6 +5190,9 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
 
 [[package]]
 name = "spm_precompiled"
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index b66dec9a4..a88401470 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -99,6 +99,7 @@ rustc-hash = "2.0.0"
 uell = "0.1.0"
 enum-iterator = "2.1.0"
 bbqueue = { git = "https://github.com/kerollmops/bbqueue" }
+flume = { version = "0.11.1", default-features = false }
 
 [dev-dependencies]
 mimalloc = { version = "0.1.43", default-features = false }
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index bebaad686..e8bb6930c 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -4,10 +4,10 @@ use std::marker::PhantomData;
 use std::mem;
 use std::num::NonZeroU16;
 
-use bbqueue::framed::{FrameGrantR, FrameProducer};
+use bbqueue::framed::{FrameGrantR, FrameGrantW, FrameProducer};
 use bbqueue::BBBuffer;
 use bytemuck::{checked, CheckedBitPattern, NoUninit};
-use crossbeam_channel::SendError;
+use flume::SendError;
 use heed::types::Bytes;
 use heed::BytesDecode;
 use memmap2::{Mmap, MmapMut};
@@ -33,7 +33,7 @@ use crate::{CboRoaringBitmapCodec, DocumentId, Index};
 ///
 /// The `channel_capacity` parameter defines the number of
 /// too-large-to-fit-in-BBQueue entries that can be sent through
-/// a crossbeam channel. This parameter must stay low to make
+/// a flume channel. This parameter must stay low to make
 /// sure we do not use too much memory.
 ///
 /// Note that the channel is also used to wake-up the receiver
@@ -61,7 +61,7 @@ pub fn extractor_writer_bbqueue(
         consumer
     });
 
-    let (sender, receiver) = crossbeam_channel::bounded(channel_capacity);
+    let (sender, receiver) = flume::bounded(channel_capacity);
     let sender = ExtractorBbqueueSender { sender, producers, capacity };
     let receiver = WriterBbqueueReceiver { receiver, consumers };
     (sender, receiver)
@@ -70,7 +70,7 @@ pub fn extractor_writer_bbqueue(
 pub struct ExtractorBbqueueSender<'a> {
     /// This channel is used to wake-up the receiver and
     /// send large entries that cannot fit in the BBQueue.
-    sender: crossbeam_channel::Sender<ReceiverAction>,
+    sender: flume::Sender<ReceiverAction>,
     /// A memory buffer, one by thread, is used to serialize
     /// the entries directly in this shared, lock-free space.
     producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
@@ -87,7 +87,7 @@ pub struct WriterBbqueueReceiver<'a> {
     /// Used to wake up when new entries are available either in
     /// any BBQueue buffer or directly sent throught this channel
     /// (still written to disk).
-    receiver: crossbeam_channel::Receiver<ReceiverAction>,
+    receiver: flume::Receiver<ReceiverAction>,
     /// The BBQueue frames to read when waking-up.
     consumers: Vec<bbqueue::framed::FrameConsumer<'a>>,
 }
@@ -437,19 +437,9 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = loop {
-            match producer.grant(total_length) {
-                Ok(grant) => break grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            }
-        };
-
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
         payload_header.serialize_into(&mut grant);
 
-        // We could commit only the used memory.
-        grant.commit(total_length);
-
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
         if self.sender.is_empty() {
@@ -494,13 +484,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = loop {
-            match producer.grant(total_length) {
-                Ok(grant) => break grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            }
-        };
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
 
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
@@ -571,13 +555,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = loop {
-            match producer.grant(total_length) {
-                Ok(grant) => break grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            }
-        };
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
 
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
@@ -585,9 +563,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
         let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
         key_value_writer(key_buffer, value_buffer)?;
 
-        // We could commit only the used memory.
-        grant.commit(total_length);
-
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
         if self.sender.is_empty() {
@@ -628,22 +603,13 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = loop {
-            match producer.grant(total_length) {
-                Ok(grant) => break grant,
-                Err(bbqueue::Error::InsufficientSize) => continue,
-                Err(e) => unreachable!("{e:?}"),
-            }
-        };
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
 
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
         payload_header.serialize_into(header_bytes);
         key_writer(remaining)?;
 
-        // We could commit only the used memory.
-        grant.commit(total_length);
-
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
         if self.sender.is_empty() {
@@ -654,6 +620,31 @@ impl<'b> ExtractorBbqueueSender<'b> {
     }
 }
 
+/// Try to reserve a frame grant of `total_length` by spin looping
+/// on the BBQueue buffer and panics if the receiver has been disconnected.
+fn reserve_grant<'b>(
+    producer: &mut FrameProducer<'b>,
+    total_length: usize,
+    sender: &flume::Sender<ReceiverAction>,
+) -> FrameGrantW<'b> {
+    loop {
+        for _ in 0..10_000 {
+            match producer.grant(total_length) {
+                Ok(mut grant) => {
+                    // We could commit only the used memory.
+                    grant.to_commit(total_length);
+                    return grant;
+                }
+                Err(bbqueue::Error::InsufficientSize) => continue,
+                Err(e) => unreachable!("{e:?}"),
+            }
+        }
+        if sender.is_disconnected() {
+            panic!("channel is disconnected");
+        }
+    }
+}
+
 pub enum ExactWordDocids {}
 pub enum FidWordCountDocids {}
 pub enum WordDocids {}

From bcab61ab1d83738710a69766bf4c3723b1596906 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:42:47 +0100
Subject: [PATCH 056/158] Do spurious wake ups on the receiver side

---
 crates/milli/src/update/new/channel.rs | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index e8bb6930c..631fcf74e 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -3,11 +3,12 @@ use std::io::{self, BufWriter};
 use std::marker::PhantomData;
 use std::mem;
 use std::num::NonZeroU16;
+use std::time::Duration;
 
 use bbqueue::framed::{FrameGrantR, FrameGrantW, FrameProducer};
 use bbqueue::BBBuffer;
 use bytemuck::{checked, CheckedBitPattern, NoUninit};
-use flume::SendError;
+use flume::{RecvTimeoutError, SendError};
 use heed::types::Bytes;
 use heed::BytesDecode;
 use memmap2::{Mmap, MmapMut};
@@ -136,10 +137,24 @@ impl LargeVectors {
 }
 
 impl<'a> WriterBbqueueReceiver<'a> {
+    /// Tries to receive an action to do until the timeout occurs
+    /// and if it does, consider it as a spurious wake up.
     pub fn recv_action(&mut self) -> Option<ReceiverAction> {
-        self.receiver.recv().ok()
+        match self.receiver.recv_timeout(Duration::from_millis(100)) {
+            Ok(action) => Some(action),
+            Err(RecvTimeoutError::Timeout) => Some(ReceiverAction::WakeUp),
+            Err(RecvTimeoutError::Disconnected) => None,
+        }
     }
 
+    /// Reads all the BBQueue buffers and selects the first available frame.
+    ///
+    /// Note: Selecting the first available frame gives preference to
+    /// frames that will be cleaned up first. It may result in the
+    /// last frames being more likely to fill up. One potential optimization
+    /// could involve keeping track of the last processed BBQueue index
+    /// to cycle through the frames instead of always starting from the
+    /// beginning.
     pub fn recv_frame(&mut self) -> Option<FrameWithHeader<'a>> {
         for consumer in &mut self.consumers {
             if let Some(frame) = consumer.read() {

From 5e218f3f4daf1594580eb377183770fd4a206a5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 10:44:42 +0100
Subject: [PATCH 057/158] Remove a sync_all (mark my words)

---
 crates/milli/src/update/new/channel.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 631fcf74e..219f20854 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -489,7 +489,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
             }
 
             let value_file = value_file.into_inner().map_err(|ie| ie.into_error())?;
-            value_file.sync_all()?;
             let embeddings = unsafe { Mmap::map(&value_file)? };
 
             let large_vectors = LargeVectors { docid, embedder_id, embeddings };

From d5c07ef7b310f8af30a6d5ac0ea2b0da93241709 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 11:02:49 +0100
Subject: [PATCH 058/158] Manage key length conversion error correctly

---
 crates/milli/src/error.rs                  | 10 ++--
 crates/milli/src/update/new/channel.rs     | 53 ++++++++++++++++++----
 crates/milli/src/update/new/indexer/mod.rs |  2 +-
 3 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs
index 800dfa375..a6774a7bd 100644
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@@ -3,6 +3,7 @@ use std::convert::Infallible;
 use std::fmt::Write;
 use std::{io, str};
 
+use bstr::BString;
 use heed::{Error as HeedError, MdbError};
 use rayon::ThreadPoolBuildError;
 use rhai::EvalAltResult;
@@ -62,14 +63,9 @@ pub enum InternalError {
     #[error(transparent)]
     Store(#[from] MdbError),
     #[error("Cannot delete {key:?} from database {database_name}: {error}")]
-    StoreDeletion { database_name: &'static str, key: Box<[u8]>, error: heed::Error },
+    StoreDeletion { database_name: &'static str, key: BString, error: heed::Error },
     #[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
-    StorePut {
-        database_name: &'static str,
-        key: Box<[u8]>,
-        value_length: usize,
-        error: heed::Error,
-    },
+    StorePut { database_name: &'static str, key: BString, value_length: usize, error: heed::Error },
     #[error(transparent)]
     Utf8(#[from] str::Utf8Error),
     #[error("An indexation process was explicitly aborted")]
diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 219f20854..b0a61bd7f 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -10,7 +10,7 @@ use bbqueue::BBBuffer;
 use bytemuck::{checked, CheckedBitPattern, NoUninit};
 use flume::{RecvTimeoutError, SendError};
 use heed::types::Bytes;
-use heed::BytesDecode;
+use heed::{BytesDecode, MdbError};
 use memmap2::{Mmap, MmapMut};
 use roaring::RoaringBitmap;
 
@@ -23,7 +23,7 @@ use crate::index::db_name;
 use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
 use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
-use crate::{CboRoaringBitmapCodec, DocumentId, Index};
+use crate::{CboRoaringBitmapCodec, DocumentId, Index, InternalError};
 
 /// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
@@ -524,7 +524,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
     }
 
     fn write_key_value(&self, database: Database, key: &[u8], value: &[u8]) -> crate::Result<()> {
-        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
+        let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
+            InternalError::StorePut {
+                database_name: database.database_name(),
+                key: key.into(),
+                value_length: value.len(),
+                error: MdbError::BadValSize.into(),
+            }
+        })?;
         self.write_key_value_with(database, key_length, value.len(), |key_buffer, value_buffer| {
             key_buffer.copy_from_slice(key);
             value_buffer.copy_from_slice(value);
@@ -587,7 +594,13 @@ impl<'b> ExtractorBbqueueSender<'b> {
     }
 
     fn delete_entry(&self, database: Database, key: &[u8]) -> crate::Result<()> {
-        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
+        let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
+            InternalError::StoreDeletion {
+                database_name: database.database_name(),
+                key: key.into(),
+                error: MdbError::BadValSize.into(),
+            }
+        })?;
         self.delete_entry_with(database, key_length, |buffer| {
             buffer.copy_from_slice(key);
             Ok(())
@@ -702,8 +715,15 @@ pub struct WordDocidsSender<'a, 'b, D> {
 
 impl<D: DatabaseType> WordDocidsSender<'_, '_, D> {
     pub fn write(&self, key: &[u8], bitmap: &RoaringBitmap) -> crate::Result<()> {
-        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
+        let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
+            InternalError::StorePut {
+                database_name: D::DATABASE.database_name(),
+                key: key.into(),
+                value_length,
+                error: MdbError::BadValSize.into(),
+            }
+        })?;
         self.sender.write_key_value_with(
             D::DATABASE,
             key_length,
@@ -731,7 +751,6 @@ impl FacetDocidsSender<'_, '_> {
         let (facet_kind, key) = FacetKind::extract_from_key(key);
         let database = Database::from(facet_kind);
 
-        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
         let value_length = CboRoaringBitmapCodec::serialized_size(bitmap);
         let value_length = match facet_kind {
             // We must take the facet group size into account
@@ -739,6 +758,14 @@ impl FacetDocidsSender<'_, '_> {
             FacetKind::Number | FacetKind::String => value_length + 1,
             FacetKind::Null | FacetKind::Empty | FacetKind::Exists => value_length,
         };
+        let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
+            InternalError::StorePut {
+                database_name: database.database_name(),
+                key: key.into(),
+                value_length,
+                error: MdbError::BadValSize.into(),
+            }
+        })?;
 
         self.sender.write_key_value_with(
             database,
@@ -862,12 +889,20 @@ impl GeoSender<'_, '_> {
     }
 
     pub fn set_geo_faceted(&self, bitmap: &RoaringBitmap) -> crate::Result<()> {
-        let key = GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes();
-        let key_length = NonZeroU16::new(key.len().try_into().unwrap()).unwrap();
+        let database = Database::Main;
         let value_length = bitmap.serialized_size();
+        let key = GEO_FACETED_DOCUMENTS_IDS_KEY.as_bytes();
+        let key_length = key.len().try_into().ok().and_then(NonZeroU16::new).ok_or_else(|| {
+            InternalError::StorePut {
+                database_name: database.database_name(),
+                key: key.into(),
+                value_length,
+                error: MdbError::BadValSize.into(),
+            }
+        })?;
 
         self.0.write_key_value_with(
-            Database::Main,
+            database,
             key_length,
             value_length,
             |key_buffer, value_buffer| {
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index bd3fedae2..7262c65cb 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -432,7 +432,7 @@ where
                         if let Err(error) = database.put(wtxn, &key, &value) {
                             return Err(Error::InternalError(InternalError::StorePut {
                                 database_name,
-                                key,
+                                key: bstr::BString::from(&key[..]),
                                 value_length: value.len(),
                                 error,
                             }));

From e9f34fb4b1d8ec674818218009055f21cb2e68e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 11:49:01 +0100
Subject: [PATCH 059/158] Make the frame consumer pulling fair

---
 crates/milli/src/update/new/channel.rs | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index b0a61bd7f..a2f16983e 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -1,8 +1,10 @@
 use std::cell::RefCell;
 use std::io::{self, BufWriter};
+use std::iter::Cycle;
 use std::marker::PhantomData;
 use std::mem;
 use std::num::NonZeroU16;
+use std::ops::Range;
 use std::time::Duration;
 
 use bbqueue::framed::{FrameGrantR, FrameGrantW, FrameProducer};
@@ -64,7 +66,11 @@ pub fn extractor_writer_bbqueue(
 
     let (sender, receiver) = flume::bounded(channel_capacity);
     let sender = ExtractorBbqueueSender { sender, producers, capacity };
-    let receiver = WriterBbqueueReceiver { receiver, consumers };
+    let receiver = WriterBbqueueReceiver {
+        receiver,
+        look_at_consumer: (0..consumers.len()).cycle(),
+        consumers,
+    };
     (sender, receiver)
 }
 
@@ -89,6 +95,9 @@ pub struct WriterBbqueueReceiver<'a> {
     /// any BBQueue buffer or directly sent throught this channel
     /// (still written to disk).
     receiver: flume::Receiver<ReceiverAction>,
+    /// Indicates the consumer to observe. This cycling range
+    /// ensures fair distribution of work among consumers.
+    look_at_consumer: Cycle<Range<usize>>,
     /// The BBQueue frames to read when waking-up.
     consumers: Vec<bbqueue::framed::FrameConsumer<'a>>,
 }
@@ -148,16 +157,9 @@ impl<'a> WriterBbqueueReceiver<'a> {
     }
 
     /// Reads all the BBQueue buffers and selects the first available frame.
-    ///
-    /// Note: Selecting the first available frame gives preference to
-    /// frames that will be cleaned up first. It may result in the
-    /// last frames being more likely to fill up. One potential optimization
-    /// could involve keeping track of the last processed BBQueue index
-    /// to cycle through the frames instead of always starting from the
-    /// beginning.
     pub fn recv_frame(&mut self) -> Option<FrameWithHeader<'a>> {
-        for consumer in &mut self.consumers {
-            if let Some(frame) = consumer.read() {
+        for index in self.look_at_consumer.by_ref().take(self.consumers.len()) {
+            if let Some(frame) = self.consumers[index].read() {
                 return Some(FrameWithHeader::from(frame));
             }
         }
@@ -511,9 +513,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
             }
         }
 
-        // We could commit only the used memory.
-        grant.commit(total_length);
-
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
         if self.sender.is_empty() {

From 767259be7e5e7c8a69a802ddae9a434e349849e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Mon, 2 Dec 2024 11:53:42 +0100
Subject: [PATCH 060/158] Prefer returning a abort indexation rather than
 throwing a panic

---
 crates/milli/src/update/new/channel.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index a2f16983e..b749eb7fe 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -25,7 +25,7 @@ use crate::index::db_name;
 use crate::index::main_key::{GEO_FACETED_DOCUMENTS_IDS_KEY, GEO_RTREE_KEY};
 use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
-use crate::{CboRoaringBitmapCodec, DocumentId, Index, InternalError};
+use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};
 
 /// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
@@ -454,7 +454,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
         payload_header.serialize_into(&mut grant);
 
         // We only send a wake up message when the channel is empty
@@ -500,7 +500,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
 
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
@@ -575,7 +575,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
 
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
@@ -629,7 +629,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender);
+        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
 
         let header_size = payload_header.header_size();
         let (header_bytes, remaining) = grant.split_at_mut(header_size);
@@ -652,21 +652,21 @@ fn reserve_grant<'b>(
     producer: &mut FrameProducer<'b>,
     total_length: usize,
     sender: &flume::Sender<ReceiverAction>,
-) -> FrameGrantW<'b> {
+) -> crate::Result<FrameGrantW<'b>> {
     loop {
         for _ in 0..10_000 {
             match producer.grant(total_length) {
                 Ok(mut grant) => {
                     // We could commit only the used memory.
                     grant.to_commit(total_length);
-                    return grant;
+                    return Ok(grant);
                 }
                 Err(bbqueue::Error::InsufficientSize) => continue,
                 Err(e) => unreachable!("{e:?}"),
             }
         }
         if sender.is_disconnected() {
-            panic!("channel is disconnected");
+            return Err(Error::InternalError(InternalError::AbortedIndexation));
         }
     }
 }

From a439fa3e1adab396074bd6387f16b081c50499ef Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 2 Dec 2024 12:02:16 +0100
Subject: [PATCH 061/158] While spamming the batches route we could see a
 processing batch becoming missing and then finished, this commit ensures the
 batches goes from processing to finished directly

---
 crates/index-scheduler/src/lib.rs       | 9 +++++----
 crates/meilisearch/tests/batches/mod.rs | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index cef24c1ea..f2510f1f9 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -1738,11 +1738,8 @@ impl IndexScheduler {
             }
         }
 
-        self.processing_tasks.write().unwrap().stop_processing();
         // We must re-add the canceled task so they're part of the same batch.
-        // processed.processing |= canceled;
         ids |= canceled;
-
         self.write_batch(&mut wtxn, processing_batch, &ids)?;
 
         #[cfg(test)]
@@ -1750,8 +1747,12 @@ impl IndexScheduler {
 
         wtxn.commit().map_err(Error::HeedTransaction)?;
 
+        // We should stop processing AFTER everything is processed and written to disk otherwise, a batch (which only lives in RAM) may appear in the processing task
+        // and then become « not found » for some time until the commit everything is written and the final commit is made.
+        self.processing_tasks.write().unwrap().stop_processing();
+
         // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
-        tracing::debug!("Deleting the update files");
+        // tracing::debug!("Deleting the update files");
 
         //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
         let idx = AtomicU32::new(0);
diff --git a/crates/meilisearch/tests/batches/mod.rs b/crates/meilisearch/tests/batches/mod.rs
index 799aa3df7..9c869c140 100644
--- a/crates/meilisearch/tests/batches/mod.rs
+++ b/crates/meilisearch/tests/batches/mod.rs
@@ -224,7 +224,7 @@ async fn list_batches_status_and_type_filtered() {
 }
 
 #[actix_rt::test]
-async fn get_batch_filter_error() {
+async fn list_batch_filter_error() {
     let server = Server::new().await;
 
     let (response, code) = server.batches_filter("lol=pied").await;

From d78f4666a0ec5f645317ea07a07c324a399bd8ca Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 2 Dec 2024 12:25:01 +0100
Subject: [PATCH 062/158] Fix autobatching of documents and settings

---
 crates/index-scheduler/src/autobatcher.rs | 82 ++---------------------
 crates/index-scheduler/src/batch.rs       | 61 -----------------
 crates/index-scheduler/src/utils.rs       |  2 +-
 3 files changed, 5 insertions(+), 140 deletions(-)

diff --git a/crates/index-scheduler/src/autobatcher.rs b/crates/index-scheduler/src/autobatcher.rs
index 0f6aa8a3a..7ce5717f5 100644
--- a/crates/index-scheduler/src/autobatcher.rs
+++ b/crates/index-scheduler/src/autobatcher.rs
@@ -115,13 +115,6 @@ pub enum BatchKind {
         allow_index_creation: bool,
         settings_ids: Vec<TaskId>,
     },
-    SettingsAndDocumentOperation {
-        settings_ids: Vec<TaskId>,
-        method: IndexDocumentsMethod,
-        allow_index_creation: bool,
-        primary_key: Option<String>,
-        operation_ids: Vec<TaskId>,
-    },
     Settings {
         allow_index_creation: bool,
         settings_ids: Vec<TaskId>,
@@ -146,7 +139,6 @@ impl BatchKind {
         match self {
             BatchKind::DocumentOperation { allow_index_creation, .. }
             | BatchKind::ClearAndSettings { allow_index_creation, .. }
-            | BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
             | BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
             _ => None,
         }
@@ -154,10 +146,7 @@ impl BatchKind {
 
     fn primary_key(&self) -> Option<Option<&str>> {
         match self {
-            BatchKind::DocumentOperation { primary_key, .. }
-            | BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
-                Some(primary_key.as_deref())
-            }
+            BatchKind::DocumentOperation { primary_key, .. } => Some(primary_key.as_deref()),
             _ => None,
         }
     }
@@ -275,8 +264,7 @@ impl BatchKind {
                 Break(BatchKind::IndexDeletion { ids })
             }
             (
-                BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
-                | BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
+                BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other },
                 K::IndexDeletion,
             ) => {
                 ids.push(id);
@@ -356,15 +344,9 @@ impl BatchKind {
             ) => Break(this),
 
             (
-                BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
+                this @ BatchKind::DocumentOperation { .. },
                 K::Settings { .. },
-            ) => Continue(BatchKind::SettingsAndDocumentOperation {
-                settings_ids: vec![id],
-                method,
-                allow_index_creation,
-                primary_key,
-                operation_ids,
-            }),
+            ) => Break(this),
 
             (BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: _ }, K::DocumentClear) => {
                 deletion_ids.push(id);
@@ -477,63 +459,7 @@ impl BatchKind {
                     allow_index_creation,
                 })
             }
-            (
-                BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
-                K::DocumentClear,
-            ) => {
-                operation_ids.push(id);
-                Continue(BatchKind::ClearAndSettings {
-                    settings_ids,
-                    other: operation_ids,
-                    allow_index_creation,
-                })
-            }
 
-            (
-                BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
-                K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
-            ) => {
-                operation_ids.push(id);
-                Continue(BatchKind::SettingsAndDocumentOperation {
-                    settings_ids,
-                    method: ReplaceDocuments,
-                    allow_index_creation,
-                        primary_key: pk2,
-                    operation_ids,
-                })
-            }
-            (
-                BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
-                K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
-            ) => {
-                operation_ids.push(id);
-                Continue(BatchKind::SettingsAndDocumentOperation {
-                    settings_ids,
-                    method: UpdateDocuments,
-                    allow_index_creation,
-                    primary_key: pk2,
-                    operation_ids,
-                })
-            }
-            // But we can't batch a settings and a doc op with another doc op
-            // this MUST be AFTER the two previous branch
-            (
-                this @ BatchKind::SettingsAndDocumentOperation { .. },
-                K::DocumentDeletion { .. } | K::DocumentImport { .. },
-            ) => Break(this),
-            (
-                BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
-                K::Settings { .. },
-            ) => {
-                settings_ids.push(id);
-                Continue(BatchKind::SettingsAndDocumentOperation {
-                    settings_ids,
-                    method,
-                    allow_index_creation,
-                    primary_key,
-                    operation_ids,
-                })
-            }
             (
                 BatchKind::IndexCreation { .. }
                 | BatchKind::IndexDeletion { .. }
diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 04cdb912f..5a1ed3aa7 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -441,67 +441,6 @@ impl IndexScheduler {
                     must_create_index,
                 }))
             }
-            BatchKind::SettingsAndDocumentOperation {
-                settings_ids,
-                method,
-                allow_index_creation,
-                primary_key,
-                operation_ids,
-            } => {
-                let settings = self.create_next_batch_index(
-                    rtxn,
-                    index_uid.clone(),
-                    BatchKind::Settings { settings_ids, allow_index_creation },
-                    current_batch,
-                    must_create_index,
-                )?;
-
-                let document_import = self.create_next_batch_index(
-                    rtxn,
-                    index_uid.clone(),
-                    BatchKind::DocumentOperation {
-                        method,
-                        allow_index_creation,
-                        primary_key,
-                        operation_ids,
-                    },
-                    current_batch,
-                    must_create_index,
-                )?;
-
-                match (document_import, settings) {
-                    (
-                        Some(Batch::IndexOperation {
-                            op:
-                                IndexOperation::DocumentOperation {
-                                    primary_key,
-                                    documents_counts,
-                                    operations,
-                                    tasks: document_import_tasks,
-                                    ..
-                                },
-                            ..
-                        }),
-                        Some(Batch::IndexOperation {
-                            op: IndexOperation::Settings { settings, tasks: settings_tasks, .. },
-                            ..
-                        }),
-                    ) => Ok(Some(Batch::IndexOperation {
-                        op: IndexOperation::SettingsAndDocumentOperation {
-                            index_uid,
-                            primary_key,
-                            method,
-                            documents_counts,
-                            operations,
-                            document_import_tasks,
-                            settings,
-                            settings_tasks,
-                        },
-                        must_create_index,
-                    })),
-                    _ => unreachable!(),
-                }
-            }
             BatchKind::IndexCreation { id } => {
                 let mut task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
                 current_batch.processing(Some(&mut task));
diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs
index 1ca782f8c..fc41d535c 100644
--- a/crates/index-scheduler/src/utils.rs
+++ b/crates/index-scheduler/src/utils.rs
@@ -106,7 +106,7 @@ impl ProcessingBatch {
         self.stats.total_nb_tasks = 0;
     }
 
-    /// Update the timestamp of the tasks and the inner structure of this sturcture.
+    /// Update the timestamp of the tasks and the inner structure of this structure.
     pub fn update(&mut self, task: &mut Task) {
         // We must re-set this value in case we're dealing with a task that has been added between
         // the `processing` and `finished` state

From 6a1d26a60c867f1a239ffc52dc91846fc28a8b88 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 2 Dec 2024 14:15:15 +0100
Subject: [PATCH 063/158] Update autobatching tests

---
 crates/index-scheduler/src/autobatcher.rs | 90 ++++++-----------------
 1 file changed, 23 insertions(+), 67 deletions(-)

diff --git a/crates/index-scheduler/src/autobatcher.rs b/crates/index-scheduler/src/autobatcher.rs
index 7ce5717f5..5950e2b13 100644
--- a/crates/index-scheduler/src/autobatcher.rs
+++ b/crates/index-scheduler/src/autobatcher.rs
@@ -734,30 +734,30 @@ mod tests {
     }
 
     #[test]
-    fn document_addition_batch_with_settings() {
+    fn document_addition_doesnt_batch_with_settings() {
         // simple case
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
 
         // multiple settings and doc addition
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
 
         // addition and setting unordered
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
 
-        // We ensure this kind of batch doesn't batch with forbidden operations
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        // Doesn't batch with other forbidden operations
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
     }
 
     #[test]
@@ -785,8 +785,8 @@ mod tests {
         debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))");
 
         debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((ClearAndSettings { other: [0, 2], allow_index_creation: true, settings_ids: [1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
     }
 
     #[test]
@@ -833,50 +833,6 @@ mod tests {
         debug_snapshot!(autobatch_from(false,None,  [doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
         debug_snapshot!(autobatch_from(false,None,  [settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, true))");
         debug_snapshot!(autobatch_from(false,None,  [settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 1] }, false))");
-
-        // Then the mixed cases.
-        // The index already exists, whatever is the right of the tasks it shouldn't change the result.
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-
-        // When the index doesn't exists yet it's more complicated.
-        // Either the first task we encounter create it, in which case we can create a big batch with everything.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(true), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        // The right of the tasks following isn't really important.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, true, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, true))");
-        // Or, the second case; the first task doesn't create the index and thus we wants to batch it with only tasks that can't create an index.
-        // that can be a second task that don't have the right to create an index. Or anything that can't create an index like an index deletion, document deletion, document clear, etc.
-        // All theses tasks are going to throw an error `Index doesn't exist` once the batch is processed.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(false), idx_del()]), @"Some((IndexDeletion { ids: [0, 2, 1] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
-        // The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whit what
-        // follows because we first need to process the erronous batch.
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
     }
 
     #[test]
@@ -885,13 +841,13 @@ mod tests {
         debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
         debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
         debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
 
         debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
         debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
         debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
-        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
+        debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
         debug_snapshot!(autobatch_from(false,None,  [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
 
         // batch deletion and addition

From 057143214d9d846b95a96999025d7ace377f39f3 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 2 Dec 2024 14:29:52 +0100
Subject: [PATCH 064/158] Fix warnings

---
 crates/index-scheduler/src/batch.rs | 75 ++---------------------------
 1 file changed, 3 insertions(+), 72 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 5a1ed3aa7..8e35ec6ac 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -104,7 +104,6 @@ pub(crate) enum IndexOperation {
         index_uid: String,
         primary_key: Option<String>,
         method: IndexDocumentsMethod,
-        documents_counts: Vec<u64>,
         operations: Vec<DocumentOperation>,
         tasks: Vec<Task>,
     },
@@ -130,19 +129,6 @@ pub(crate) enum IndexOperation {
         index_uid: String,
         cleared_tasks: Vec<Task>,
 
-        // The boolean indicates if it's a settings deletion or creation.
-        settings: Vec<(bool, Settings<Unchecked>)>,
-        settings_tasks: Vec<Task>,
-    },
-    SettingsAndDocumentOperation {
-        index_uid: String,
-
-        primary_key: Option<String>,
-        method: IndexDocumentsMethod,
-        documents_counts: Vec<u64>,
-        operations: Vec<DocumentOperation>,
-        document_import_tasks: Vec<Task>,
-
         // The boolean indicates if it's a settings deletion or creation.
         settings: Vec<(bool, Settings<Unchecked>)>,
         settings_tasks: Vec<Task>,
@@ -174,12 +160,7 @@ impl Batch {
                 IndexOperation::DocumentEdition { task, .. } => {
                     RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
                 }
-                IndexOperation::SettingsAndDocumentOperation {
-                    document_import_tasks: tasks,
-                    settings_tasks: other,
-                    ..
-                }
-                | IndexOperation::DocumentClearAndSetting {
+                IndexOperation::DocumentClearAndSetting {
                     cleared_tasks: tasks,
                     settings_tasks: other,
                     ..
@@ -239,8 +220,7 @@ impl IndexOperation {
             | IndexOperation::DocumentDeletion { index_uid, .. }
             | IndexOperation::DocumentClear { index_uid, .. }
             | IndexOperation::Settings { index_uid, .. }
-            | IndexOperation::DocumentClearAndSetting { index_uid, .. }
-            | IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
+            | IndexOperation::DocumentClearAndSetting { index_uid, .. } => index_uid,
         }
     }
 }
@@ -262,9 +242,6 @@ impl fmt::Display for IndexOperation {
             IndexOperation::DocumentClearAndSetting { .. } => {
                 f.write_str("IndexOperation::DocumentClearAndSetting")
             }
-            IndexOperation::SettingsAndDocumentOperation { .. } => {
-                f.write_str("IndexOperation::SettingsAndDocumentOperation")
-            }
         }
     }
 }
@@ -330,21 +307,14 @@ impl IndexScheduler {
                     })
                     .flatten();
 
-                let mut documents_counts = Vec::new();
                 let mut operations = Vec::new();
 
                 for task in tasks.iter() {
                     match task.kind {
-                        KindWithContent::DocumentAdditionOrUpdate {
-                            content_file,
-                            documents_count,
-                            ..
-                        } => {
-                            documents_counts.push(documents_count);
+                        KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => {
                             operations.push(DocumentOperation::Add(content_file));
                         }
                         KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
-                            documents_counts.push(documents_ids.len() as u64);
                             operations.push(DocumentOperation::Delete(documents_ids.clone()));
                         }
                         _ => unreachable!(),
@@ -356,7 +326,6 @@ impl IndexScheduler {
                         index_uid,
                         primary_key,
                         method,
-                        documents_counts,
                         operations,
                         tasks,
                     },
@@ -1243,7 +1212,6 @@ impl IndexScheduler {
                 index_uid: _,
                 primary_key,
                 method,
-                documents_counts: _,
                 operations,
                 mut tasks,
             } => {
@@ -1633,43 +1601,6 @@ impl IndexScheduler {
 
                 Ok(tasks)
             }
-            IndexOperation::SettingsAndDocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations,
-                document_import_tasks,
-                settings,
-                settings_tasks,
-            } => {
-                let settings_tasks = self.apply_index_operation(
-                    index_wtxn,
-                    index,
-                    IndexOperation::Settings {
-                        index_uid: index_uid.clone(),
-                        settings,
-                        tasks: settings_tasks,
-                    },
-                )?;
-
-                let mut import_tasks = self.apply_index_operation(
-                    index_wtxn,
-                    index,
-                    IndexOperation::DocumentOperation {
-                        index_uid,
-                        primary_key,
-                        method,
-                        documents_counts,
-                        operations,
-                        tasks: document_import_tasks,
-                    },
-                )?;
-
-                let mut tasks = settings_tasks;
-                tasks.append(&mut import_tasks);
-                Ok(tasks)
-            }
             IndexOperation::DocumentClearAndSetting {
                 index_uid,
                 cleared_tasks,

From beeb31ce41e31074e1c85367684e0d78d8d008c1 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 2 Dec 2024 15:32:16 +0100
Subject: [PATCH 065/158] Update crates/index-scheduler/src/lib.rs

---
 crates/index-scheduler/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index f2510f1f9..c719bb35e 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -1752,7 +1752,7 @@ impl IndexScheduler {
         self.processing_tasks.write().unwrap().stop_processing();
 
         // Once the tasks are committed, we should delete all the update files associated ASAP to avoid leaking files in case of a restart
-        // tracing::debug!("Deleting the update files");
+        tracing::debug!("Deleting the update files");
 
         //We take one read transaction **per thread**. Then, every thread is going to pull out new IDs from the roaring bitmap with the help of an atomic shared index into the bitmap
         let idx = AtomicU32::new(0);

From d040aff10124c72b4785f295163189943704fb4d Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 2 Dec 2024 16:30:14 +0100
Subject: [PATCH 066/158] Stop allocating 1GiB for documents

---
 crates/meilisearch-types/src/document_formats.rs        | 2 +-
 crates/milli/src/update/new/indexer/document_changes.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs
index 096349448..008be4022 100644
--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@@ -214,7 +214,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
     // We memory map to be able to deserialize into a RawMap that
     // does not allocate when possible and only materialize the first/top level.
     let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
-    let mut doc_alloc = Bump::with_capacity(1024 * 1024 * 1024); // 1MiB
+    let mut doc_alloc = Bump::with_capacity(1024 * 1024); // 1MiB
 
     let mut out = BufWriter::new(output);
     let mut deserializer = serde_json::Deserializer::from_slice(&input);
diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs
index bfb369680..2a5c25525 100644
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@@ -70,7 +70,7 @@ impl<
         F: FnOnce(&'extractor Bump) -> Result<T>,
     {
         let doc_alloc =
-            doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024))));
+            doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024))));
         let doc_alloc = doc_alloc.0.take();
         let fields_ids_map = fields_ids_map_store
             .get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into());

From e905a72d731f0e6dc581d9b7ad02b94e594aa94e Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 2 Dec 2024 18:13:56 +0100
Subject: [PATCH 067/158] remove mimalloc on Windows

---
 crates/benchmarks/benches/indexing.rs     | 1 +
 crates/benchmarks/benches/search_geo.rs   | 1 +
 crates/benchmarks/benches/search_songs.rs | 1 +
 crates/benchmarks/benches/search_wiki.rs  | 1 +
 crates/meilisearch/src/main.rs            | 4 ++--
 crates/milli/src/lib.rs                   | 1 +
 6 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs
index d3f307be3..870e56686 100644
--- a/crates/benchmarks/benches/indexing.rs
+++ b/crates/benchmarks/benches/indexing.rs
@@ -16,6 +16,7 @@ use rand::seq::SliceRandom;
 use rand_chacha::rand_core::SeedableRng;
 use roaring::RoaringBitmap;
 
+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
diff --git a/crates/benchmarks/benches/search_geo.rs b/crates/benchmarks/benches/search_geo.rs
index faea4e3e0..72503ce57 100644
--- a/crates/benchmarks/benches/search_geo.rs
+++ b/crates/benchmarks/benches/search_geo.rs
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
 use milli::update::Settings;
 use utils::Conf;
 
+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
diff --git a/crates/benchmarks/benches/search_songs.rs b/crates/benchmarks/benches/search_songs.rs
index a1245528f..bef014a0e 100644
--- a/crates/benchmarks/benches/search_songs.rs
+++ b/crates/benchmarks/benches/search_songs.rs
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
 use milli::update::Settings;
 use utils::Conf;
 
+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
diff --git a/crates/benchmarks/benches/search_wiki.rs b/crates/benchmarks/benches/search_wiki.rs
index b792c2645..24eb5c8d1 100644
--- a/crates/benchmarks/benches/search_wiki.rs
+++ b/crates/benchmarks/benches/search_wiki.rs
@@ -5,6 +5,7 @@ use criterion::{criterion_group, criterion_main};
 use milli::update::Settings;
 use utils::Conf;
 
+#[cfg(not(windows))]
 #[global_allocator]
 static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
diff --git a/crates/meilisearch/src/main.rs b/crates/meilisearch/src/main.rs
index c0652bf1e..b4b46bec4 100644
--- a/crates/meilisearch/src/main.rs
+++ b/crates/meilisearch/src/main.rs
@@ -20,14 +20,14 @@ use meilisearch::{
     LogStderrType, Opt, SubscriberForSecondLayer,
 };
 use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
-use mimalloc::MiMalloc;
 use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 use tracing::level_filters::LevelFilter;
 use tracing_subscriber::layer::SubscriberExt as _;
 use tracing_subscriber::Layer;
 
+#[cfg(not(windows))]
 #[global_allocator]
-static ALLOC: MiMalloc = MiMalloc;
+static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 
 fn default_log_route_layer() -> LogRouteType {
     None.with_filter(tracing_subscriber::filter::Targets::new().with_target("", LevelFilter::OFF))
diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs
index 48b03b6cc..1fc876f79 100644
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@@ -1,6 +1,7 @@
 #![cfg_attr(all(test, fuzzing), feature(no_coverage))]
 #![allow(clippy::type_complexity)]
 
+#[cfg(not(windows))]
 #[cfg(test)]
 #[global_allocator]
 pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

From 71d53f413fe06273068ef17313e3f88cf7b95d81 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 3 Dec 2024 11:07:03 +0100
Subject: [PATCH 068/158] increase the margin allowed to delete task

---
 crates/index-scheduler/src/lib.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index d6de9c758..e071c4cc0 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -1440,7 +1440,7 @@ impl IndexScheduler {
 
         // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
         if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
-            && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
+            && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 40
         {
             return Err(Error::NoSpaceLeftInTaskQueue);
         }

From 0ad2f57a9215f1028778d8e3668c3cb7f32709f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 3 Dec 2024 11:35:45 +0100
Subject: [PATCH 069/158] Update bbqueue repo to point to the meilisearch org

---
 Cargo.lock              | 2 +-
 crates/milli/Cargo.toml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 038b269ce..3c2fb711e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -492,7 +492,7 @@ checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
 [[package]]
 name = "bbqueue"
 version = "0.5.1"
-source = "git+https://github.com/kerollmops/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
+source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2443e06ba0d6d4"
 
 [[package]]
 name = "benchmarks"
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index a88401470..2a959b654 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -98,7 +98,7 @@ allocator-api2 = "0.2.18"
 rustc-hash = "2.0.0"
 uell = "0.1.0"
 enum-iterator = "2.1.0"
-bbqueue = { git = "https://github.com/kerollmops/bbqueue" }
+bbqueue = { git = "https://github.com/meilisearch/bbqueue" }
 flume = { version = "0.11.1", default-features = false }
 
 [dev-dependencies]

From 8ecb726683bca6a2e2c837db8c187ddbe39554f6 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 3 Dec 2024 15:49:11 +0100
Subject: [PATCH 070/158] Fix the minimun BBQueue channel threshold

---
 crates/milli/src/update/new/indexer/mod.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 7262c65cb..383823de1 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -86,9 +86,9 @@ where
         (grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
         |max_memory| {
             // 2% of the indexing memory
-            let total_bbbuffer_capacity = (max_memory / 100 / 2).min(minimum_capacity);
+            let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
             let new_grenad_parameters = GrenadParameters {
-                max_memory: Some(max_memory - total_bbbuffer_capacity),
+                max_memory: Some(max_memory.saturating_sub(total_bbbuffer_capacity)),
                 ..grenad_parameters
             };
             (new_grenad_parameters, total_bbbuffer_capacity)

From 0459b1a2420d40282a0a259f02f0aedd57db6514 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 10:32:25 +0100
Subject: [PATCH 071/158] Change the reserve and grant function to accept a
 closure

---
 crates/milli/src/update/new/channel.rs | 71 +++++++++++++++-----------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index b749eb7fe..5675069d6 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -7,7 +7,7 @@ use std::num::NonZeroU16;
 use std::ops::Range;
 use std::time::Duration;
 
-use bbqueue::framed::{FrameGrantR, FrameGrantW, FrameProducer};
+use bbqueue::framed::{FrameGrantR, FrameProducer};
 use bbqueue::BBBuffer;
 use bytemuck::{checked, CheckedBitPattern, NoUninit};
 use flume::{RecvTimeoutError, SendError};
@@ -454,8 +454,10 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
-        payload_header.serialize_into(&mut grant);
+        reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
+            payload_header.serialize_into(grant);
+            Ok(())
+        })?;
 
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
@@ -500,18 +502,20 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
+        reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
+            let header_size = payload_header.header_size();
+            let (header_bytes, remaining) = grant.split_at_mut(header_size);
+            payload_header.serialize_into(header_bytes);
 
-        let header_size = payload_header.header_size();
-        let (header_bytes, remaining) = grant.split_at_mut(header_size);
-        payload_header.serialize_into(header_bytes);
-
-        if dimensions != 0 {
-            let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
-            for (embedding, output) in embeddings.iter().zip(output_iter) {
-                output.copy_from_slice(bytemuck::cast_slice(embedding));
+            if dimensions != 0 {
+                let output_iter = remaining.chunks_exact_mut(dimensions * mem::size_of::<f32>());
+                for (embedding, output) in embeddings.iter().zip(output_iter) {
+                    output.copy_from_slice(bytemuck::cast_slice(embedding));
+                }
             }
-        }
+
+            Ok(())
+        })?;
 
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
@@ -575,13 +579,13 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
-
-        let header_size = payload_header.header_size();
-        let (header_bytes, remaining) = grant.split_at_mut(header_size);
-        payload_header.serialize_into(header_bytes);
-        let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
-        key_value_writer(key_buffer, value_buffer)?;
+        reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
+            let header_size = payload_header.header_size();
+            let (header_bytes, remaining) = grant.split_at_mut(header_size);
+            payload_header.serialize_into(header_bytes);
+            let (key_buffer, value_buffer) = remaining.split_at_mut(key_length.get() as usize);
+            key_value_writer(key_buffer, value_buffer)
+        })?;
 
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
@@ -629,12 +633,12 @@ impl<'b> ExtractorBbqueueSender<'b> {
         }
 
         // Spin loop to have a frame the size we requested.
-        let mut grant = reserve_grant(&mut producer, total_length, &self.sender)?;
-
-        let header_size = payload_header.header_size();
-        let (header_bytes, remaining) = grant.split_at_mut(header_size);
-        payload_header.serialize_into(header_bytes);
-        key_writer(remaining)?;
+        reserve_and_write_grant(&mut producer, total_length, &self.sender, |grant| {
+            let header_size = payload_header.header_size();
+            let (header_bytes, remaining) = grant.split_at_mut(header_size);
+            payload_header.serialize_into(header_bytes);
+            key_writer(remaining)
+        })?;
 
         // We only send a wake up message when the channel is empty
         // so that we don't fill the channel with too many WakeUps.
@@ -648,18 +652,23 @@ impl<'b> ExtractorBbqueueSender<'b> {
 
 /// Try to reserve a frame grant of `total_length` by spin looping
 /// on the BBQueue buffer and panics if the receiver has been disconnected.
-fn reserve_grant<'b>(
-    producer: &mut FrameProducer<'b>,
+fn reserve_and_write_grant<F>(
+    producer: &mut FrameProducer,
     total_length: usize,
     sender: &flume::Sender<ReceiverAction>,
-) -> crate::Result<FrameGrantW<'b>> {
+    f: F,
+) -> crate::Result<()>
+where
+    F: FnOnce(&mut [u8]) -> crate::Result<()>,
+{
     loop {
         for _ in 0..10_000 {
             match producer.grant(total_length) {
                 Ok(mut grant) => {
                     // We could commit only the used memory.
-                    grant.to_commit(total_length);
-                    return Ok(grant);
+                    f(&mut grant)?;
+                    grant.commit(total_length);
+                    return Ok(());
                 }
                 Err(bbqueue::Error::InsufficientSize) => continue,
                 Err(e) => unreachable!("{e:?}"),

From 96831ed9bb9b2784a294f32f4665f16135347f27 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 11:03:01 +0100
Subject: [PATCH 072/158] Send the WakeUp message if necessary in the reserve
 function

---
 crates/milli/src/update/new/channel.rs | 36 +++++++-------------------
 1 file changed, 10 insertions(+), 26 deletions(-)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index 5675069d6..ebd0ba429 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -459,12 +459,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
             Ok(())
         })?;
 
-        // We only send a wake up message when the channel is empty
-        // so that we don't fill the channel with too many WakeUps.
-        if self.sender.is_empty() {
-            self.sender.send(ReceiverAction::WakeUp).unwrap();
-        }
-
         Ok(())
     }
 
@@ -517,12 +511,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
             Ok(())
         })?;
 
-        // We only send a wake up message when the channel is empty
-        // so that we don't fill the channel with too many WakeUps.
-        if self.sender.is_empty() {
-            self.sender.send(ReceiverAction::WakeUp).unwrap();
-        }
-
         Ok(())
     }
 
@@ -587,12 +575,6 @@ impl<'b> ExtractorBbqueueSender<'b> {
             key_value_writer(key_buffer, value_buffer)
         })?;
 
-        // We only send a wake up message when the channel is empty
-        // so that we don't fill the channel with too many WakeUps.
-        if self.sender.is_empty() {
-            self.sender.send(ReceiverAction::WakeUp).unwrap();
-        }
-
         Ok(())
     }
 
@@ -640,18 +622,13 @@ impl<'b> ExtractorBbqueueSender<'b> {
             key_writer(remaining)
         })?;
 
-        // We only send a wake up message when the channel is empty
-        // so that we don't fill the channel with too many WakeUps.
-        if self.sender.is_empty() {
-            self.sender.send(ReceiverAction::WakeUp).unwrap();
-        }
-
         Ok(())
     }
 }
 
-/// Try to reserve a frame grant of `total_length` by spin looping
-/// on the BBQueue buffer and panics if the receiver has been disconnected.
+/// Try to reserve a frame grant of `total_length` by spin
+/// looping on the BBQueue buffer, panics if the receiver
+/// has been disconnected or send a WakeUp message if necessary.
 fn reserve_and_write_grant<F>(
     producer: &mut FrameProducer,
     total_length: usize,
@@ -668,6 +645,13 @@ where
                     // We could commit only the used memory.
                     f(&mut grant)?;
                     grant.commit(total_length);
+
+                    // We only send a wake up message when the channel is empty
+                    // so that we don't fill the channel with too many WakeUps.
+                    if sender.is_empty() {
+                        sender.send(ReceiverAction::WakeUp).unwrap();
+                    }
+
                     return Ok(());
                 }
                 Err(bbqueue::Error::InsufficientSize) => continue,

From 953a82ca04f64a6b3db1c421fc7ab778038357ea Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 11:15:29 +0100
Subject: [PATCH 073/158] Add new error message

---
 crates/meilisearch-types/src/error.rs         |  1 +
 crates/meilisearch/src/search/mod.rs          |  7 ++++++
 .../meilisearch/tests/search/facet_search.rs  | 22 +++++++++++++++----
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs
index 4b930bf8d..c68059682 100644
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@@ -279,6 +279,7 @@ InvalidSearchPage                     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchQ                        , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchQuery               , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchName                , InvalidRequest       , BAD_REQUEST ;
+InvalidFacetSearchDisabled            , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchVector                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowMatchesPosition      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowRankingScore         , InvalidRequest       , BAD_REQUEST ;
diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs
index 7e185e951..9e0c936b7 100644
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -1407,6 +1407,13 @@ pub fn perform_facet_search(
         None => TimeBudget::default(),
     };
 
+    if !index.facet_search(&rtxn)? {
+        return Err(ResponseError::from_msg(
+            "The facet search is disabled for this index".to_string(),
+            Code::InvalidFacetSearchDisabled,
+        ));
+    }
+
     // In the faceted search context, we want to use the intersection between the locales provided by the user
     // and the locales of the facet string.
     // If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs
index 8fbeae293..418cb4da4 100644
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@@ -221,8 +221,15 @@ async fn add_documents_and_deactivate_facet_search() {
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
 
-    assert_eq!(code, 200, "{}", response);
-    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
+    assert_eq!(code, 400, "{}", response);
+    snapshot!(response, @r###"
+    {
+      "message": "Facet search is disabled for this index",
+      "code": "invalid_search_disabled_facet_search",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_disabled_facet_search"
+    }
+    "###);
 }
 
 #[actix_rt::test]
@@ -245,8 +252,15 @@ async fn deactivate_facet_search_and_add_documents() {
     let (response, code) =
         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
 
-    assert_eq!(code, 200, "{}", response);
-    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 0);
+    assert_eq!(code, 400, "{}", response);
+    snapshot!(response, @r###"
+    {
+      "message": "Facet search is disabled for this index",
+      "code": "invalid_search_disabled_facet_search",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_disabled_facet_search"
+    }
+    "###);
 }
 
 #[actix_rt::test]

From 5ce9acb0b9eb8878b6514f38ef8641867e4f3e01 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 12:19:19 +0100
Subject: [PATCH 074/158] Add workloads

---
 workloads/hackernews-add-new-documents.json   | 106 +++++++++++++++
 .../hackernews-modify-facet-numbers.json      | 111 ++++++++++++++++
 .../hackernews-modify-facet-strings.json      | 111 ++++++++++++++++
 workloads/hackernews-modify-searchables.json  | 124 ++++++++++++++++++
 4 files changed, 452 insertions(+)
 create mode 100644 workloads/hackernews-add-new-documents.json
 create mode 100644 workloads/hackernews-modify-facet-numbers.json
 create mode 100644 workloads/hackernews-modify-facet-strings.json
 create mode 100644 workloads/hackernews-modify-searchables.json

diff --git a/workloads/hackernews-add-new-documents.json b/workloads/hackernews-add-new-documents.json
new file mode 100644
index 000000000..38e7747c0
--- /dev/null
+++ b/workloads/hackernews-add-new-documents.json
@@ -0,0 +1,106 @@
+{
+    "name": "hackernews.add_new_documents",
+    "run_count": 3,
+    "extra_cli_args": [],
+    "assets": {
+        "hackernews-01.ndjson": {
+          "local_location": null,
+          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+          "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+        },
+        "hackernews-02.ndjson": {
+          "local_location": null,
+          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+          "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+        },
+        "hackernews-03.ndjson": {
+          "local_location": null,
+          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+          "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+        },
+        "hackernews-04.ndjson": {
+          "local_location": null,
+          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+          "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+        },
+        "hackernews-05.ndjson": {
+          "local_location": null,
+          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+          "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+        }
+    },
+    "precommands": [
+      {
+        "route": "indexes/movies/settings",
+        "method": "PATCH",
+        "body": {
+          "inline": {
+            "displayedAttributes": [
+              "title",
+              "by",
+              "score",
+              "time",
+              "text"
+            ],
+            "searchableAttributes": [
+              "title",
+              "text"
+            ],
+            "filterableAttributes": [
+              "by",
+              "kids",
+              "parent"
+            ],
+            "sortableAttributes": [
+              "score",
+              "time"
+            ]
+          }
+        },
+        "synchronous": "WaitForTask"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-01.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-02.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-03.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-04.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+    ],
+    "commands": [
+        {
+          "route": "indexes/movies/documents",
+          "method": "POST",
+          "body": {
+            "asset": "hackernews-05.ndjson"
+          },
+          "synchronous": "WaitForTask"
+        }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/workloads/hackernews-modify-facet-numbers.json b/workloads/hackernews-modify-facet-numbers.json
new file mode 100644
index 000000000..84d94969b
--- /dev/null
+++ b/workloads/hackernews-modify-facet-numbers.json
@@ -0,0 +1,111 @@
+{
+    "name": "hackernews.modify_facet_numbers",
+    "run_count": 3,
+    "extra_cli_args": [],
+    "assets": {
+      "hackernews-01.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+      },
+      "hackernews-02.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+      },
+      "hackernews-03.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+      },
+      "hackernews-04.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+      },
+      "hackernews-05.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+      },
+      "hackernews-02-modified-filters.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
+        "sha256": "1fcb6f89ddeff51c3fe7b86b3574f894ff9859a76cf056ab7e7dacc72970dabb"
+      }
+    },
+    "precommands": [
+      {
+        "route": "indexes/movies/settings",
+        "method": "PATCH",
+        "body": {
+          "inline": {
+            "displayedAttributes": [
+              "title",
+              "by",
+              "score",
+              "time",
+              "text"
+            ],
+            "searchableAttributes": [
+              "title",
+              "text"
+            ],
+            "filterableAttributes": [
+              "by",
+              "kids",
+              "parent"
+            ],
+            "sortableAttributes": [
+              "score",
+              "time"
+            ]
+          }
+        },
+        "synchronous": "WaitForTask"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-01.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-02.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-03.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-04.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+    ],
+    "commands": [
+        {
+          "route": "indexes/movies/documents",
+          "method": "POST",
+          "body": {
+            "asset": "hackernews-01-modified-filters.ndjson"
+          },
+          "synchronous": "WaitForTask"
+        }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/workloads/hackernews-modify-facet-strings.json b/workloads/hackernews-modify-facet-strings.json
new file mode 100644
index 000000000..f912558e8
--- /dev/null
+++ b/workloads/hackernews-modify-facet-strings.json
@@ -0,0 +1,111 @@
+{
+    "name": "hackernews.modify_facet_strings",
+    "run_count": 3,
+    "extra_cli_args": [],
+    "assets": {
+      "hackernews-01.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+      },
+      "hackernews-02.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+      },
+      "hackernews-03.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+      },
+      "hackernews-04.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+      },
+      "hackernews-05.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+      },
+      "hackernews-01-modified-filters.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
+        "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
+      }
+    },
+    "precommands": [
+      {
+        "route": "indexes/movies/settings",
+        "method": "PATCH",
+        "body": {
+          "inline": {
+            "displayedAttributes": [
+              "title",
+              "by",
+              "score",
+              "time",
+              "text"
+            ],
+            "searchableAttributes": [
+              "title",
+              "text"
+            ],
+            "filterableAttributes": [
+              "by",
+              "kids",
+              "parent"
+            ],
+            "sortableAttributes": [
+              "score",
+              "time"
+            ]
+          }
+        },
+        "synchronous": "WaitForTask"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-01.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-02.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-03.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-04.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+    ],
+    "commands": [
+        {
+          "route": "indexes/movies/documents",
+          "method": "POST",
+          "body": {
+            "asset": "hackernews-01-modified-filters.ndjson"
+          },
+          "synchronous": "WaitForTask"
+        }
+    ]
+  }
+  
\ No newline at end of file
diff --git a/workloads/hackernews-modify-searchables.json b/workloads/hackernews-modify-searchables.json
new file mode 100644
index 000000000..0f674ece0
--- /dev/null
+++ b/workloads/hackernews-modify-searchables.json
@@ -0,0 +1,124 @@
+{
+    "name": "hackernews.modify_searchables",
+    "run_count": 3,
+    "extra_cli_args": [],
+    "assets": {
+      "hackernews-01.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+      },
+      "hackernews-02.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+      },
+      "hackernews-03.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+      },
+      "hackernews-04.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+      },
+      "hackernews-05.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+      },
+      "hackernews-01-modified-searchables.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
+        "sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
+      },
+      "hackernews-02-modified-searchables.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
+        "sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
+      }
+    },
+    "precommands": [
+      {
+        "route": "indexes/movies/settings",
+        "method": "PATCH",
+        "body": {
+          "inline": {
+            "displayedAttributes": [
+              "title",
+              "by",
+              "score",
+              "time",
+              "text"
+            ],
+            "searchableAttributes": [
+              "title",
+              "text"
+            ],
+            "filterableAttributes": [
+              "by",
+              "kids",
+              "parent"
+            ],
+            "sortableAttributes": [
+              "score",
+              "time"
+            ]
+          }
+        },
+        "synchronous": "WaitForTask"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-01.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-02.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-03.ndjson"
+        },
+        "synchronous": "WaitForResponse"
+      },
+      {
+        "route": "indexes/movies/documents",
+        "method": "POST",
+        "body": {
+          "asset": "hackernews-04.ndjson"
+        },
+        "synchronous": "WaitForTask"
+      }
+    ],
+    "commands": [
+        {
+          "route": "indexes/movies/documents",
+          "method": "POST",
+          "body": {
+            "asset": "hackernews-01-modified-searchables.ndjson"
+          },
+          "synchronous": "WaitForTask"
+        },
+        {
+          "route": "indexes/movies/documents",
+          "method": "POST",
+          "body": {
+            "asset": "hackernews-02-modified-searchables.ndjson"
+          },
+          "synchronous": "WaitForTask"
+        }
+    ]
+  }
+  
\ No newline at end of file

From 1a17e2e5727b9f98685176f6b14984655c245c9f Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 13:57:06 +0100
Subject: [PATCH 075/158] fix formating

---
 workloads/hackernews-add-new-documents.json   | 189 ++++++++-------
 .../hackernews-modify-facet-numbers.json      | 200 ++++++++--------
 .../hackernews-modify-facet-strings.json      | 202 ++++++++--------
 workloads/hackernews-modify-searchables.json  | 219 +++++++++---------
 4 files changed, 404 insertions(+), 406 deletions(-)

diff --git a/workloads/hackernews-add-new-documents.json b/workloads/hackernews-add-new-documents.json
index 38e7747c0..0470a0792 100644
--- a/workloads/hackernews-add-new-documents.json
+++ b/workloads/hackernews-add-new-documents.json
@@ -1,106 +1,105 @@
 {
-    "name": "hackernews.add_new_documents",
-    "run_count": 3,
-    "extra_cli_args": [],
-    "assets": {
-        "hackernews-01.ndjson": {
-          "local_location": null,
-          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
-          "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
-        },
-        "hackernews-02.ndjson": {
-          "local_location": null,
-          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
-          "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
-        },
-        "hackernews-03.ndjson": {
-          "local_location": null,
-          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
-          "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
-        },
-        "hackernews-04.ndjson": {
-          "local_location": null,
-          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
-          "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
-        },
-        "hackernews-05.ndjson": {
-          "local_location": null,
-          "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
-          "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+  "name": "hackernews.add_new_documents",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+      "hackernews-01.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
+      },
+      "hackernews-02.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+      },
+      "hackernews-03.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+      },
+      "hackernews-04.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+      },
+      "hackernews-05.ndjson": {
+        "local_location": null,
+        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+      }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
         }
+      },
+      "synchronous": "WaitForTask"
     },
-    "precommands": [
-      {
-        "route": "indexes/movies/settings",
-        "method": "PATCH",
-        "body": {
-          "inline": {
-            "displayedAttributes": [
-              "title",
-              "by",
-              "score",
-              "time",
-              "text"
-            ],
-            "searchableAttributes": [
-              "title",
-              "text"
-            ],
-            "filterableAttributes": [
-              "by",
-              "kids",
-              "parent"
-            ],
-            "sortableAttributes": [
-              "score",
-              "time"
-            ]
-          }
-        },
-        "synchronous": "WaitForTask"
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
       },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
       {
         "route": "indexes/movies/documents",
         "method": "POST",
         "body": {
-          "asset": "hackernews-01.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-02.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-03.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-04.ndjson"
+          "asset": "hackernews-05.ndjson"
         },
         "synchronous": "WaitForTask"
       }
-    ],
-    "commands": [
-        {
-          "route": "indexes/movies/documents",
-          "method": "POST",
-          "body": {
-            "asset": "hackernews-05.ndjson"
-          },
-          "synchronous": "WaitForTask"
-        }
-    ]
-  }
-  
\ No newline at end of file
+  ]
+}
diff --git a/workloads/hackernews-modify-facet-numbers.json b/workloads/hackernews-modify-facet-numbers.json
index 84d94969b..c0726aedd 100644
--- a/workloads/hackernews-modify-facet-numbers.json
+++ b/workloads/hackernews-modify-facet-numbers.json
@@ -1,111 +1,111 @@
 {
-    "name": "hackernews.modify_facet_numbers",
-    "run_count": 3,
-    "extra_cli_args": [],
-    "assets": {
-      "hackernews-01.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
-        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
-      },
-      "hackernews-02.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
-        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
-      },
-      "hackernews-03.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
-        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
-      },
-      "hackernews-04.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
-        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
-      },
-      "hackernews-05.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
-        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
-      },
-      "hackernews-02-modified-filters.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
-        "sha256": "1fcb6f89ddeff51c3fe7b86b3574f894ff9859a76cf056ab7e7dacc72970dabb"
-      }
+  "name": "hackernews.modify_facet_numbers",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-01.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+      "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
     },
-    "precommands": [
-      {
-        "route": "indexes/movies/settings",
-        "method": "PATCH",
-        "body": {
-          "inline": {
-            "displayedAttributes": [
-              "title",
-              "by",
-              "score",
-              "time",
-              "text"
-            ],
-            "searchableAttributes": [
-              "title",
-              "text"
-            ],
-            "filterableAttributes": [
-              "by",
-              "kids",
-              "parent"
-            ],
-            "sortableAttributes": [
-              "score",
-              "time"
-            ]
-          }
-        },
-        "synchronous": "WaitForTask"
+    "hackernews-02.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+      "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+    },
+    "hackernews-03.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+      "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+    },
+    "hackernews-04.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+      "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+    },
+    "hackernews-05.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+      "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+    },
+    "hackernews-02-modified-filters.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
+      "sha256": "1fcb6f89ddeff51c3fe7b86b3574f894ff9859a76cf056ab7e7dacc72970dabb"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
       },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
       {
         "route": "indexes/movies/documents",
         "method": "POST",
         "body": {
-          "asset": "hackernews-01.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-02.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-03.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-04.ndjson"
+          "asset": "hackernews-01-modified-filters.ndjson"
         },
         "synchronous": "WaitForTask"
       }
-    ],
-    "commands": [
-        {
-          "route": "indexes/movies/documents",
-          "method": "POST",
-          "body": {
-            "asset": "hackernews-01-modified-filters.ndjson"
-          },
-          "synchronous": "WaitForTask"
-        }
-    ]
-  }
+  ]
+}
   
\ No newline at end of file
diff --git a/workloads/hackernews-modify-facet-strings.json b/workloads/hackernews-modify-facet-strings.json
index f912558e8..7c5eb2e70 100644
--- a/workloads/hackernews-modify-facet-strings.json
+++ b/workloads/hackernews-modify-facet-strings.json
@@ -1,111 +1,111 @@
 {
-    "name": "hackernews.modify_facet_strings",
-    "run_count": 3,
-    "extra_cli_args": [],
-    "assets": {
-      "hackernews-01.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
-        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
-      },
-      "hackernews-02.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
-        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
-      },
-      "hackernews-03.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
-        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
-      },
-      "hackernews-04.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
-        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
-      },
-      "hackernews-05.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
-        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
-      },
-      "hackernews-01-modified-filters.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
-        "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
-      }
+  "name": "hackernews.modify_facet_strings",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-01.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+      "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
     },
-    "precommands": [
-      {
-        "route": "indexes/movies/settings",
-        "method": "PATCH",
-        "body": {
-          "inline": {
-            "displayedAttributes": [
-              "title",
-              "by",
-              "score",
-              "time",
-              "text"
-            ],
-            "searchableAttributes": [
-              "title",
-              "text"
-            ],
-            "filterableAttributes": [
-              "by",
-              "kids",
-              "parent"
-            ],
-            "sortableAttributes": [
-              "score",
-              "time"
-            ]
-          }
-        },
-        "synchronous": "WaitForTask"
+    "hackernews-02.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+      "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+    },
+    "hackernews-03.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+      "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+    },
+    "hackernews-04.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+      "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+    },
+    "hackernews-05.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+      "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+    },
+    "hackernews-01-modified-filters.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-filters.ndjson",
+      "sha256": "b80c245ce1b1df80b9b38800f677f3bd11947ebc62716fb108269d50e796c35c"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
       },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
       {
         "route": "indexes/movies/documents",
         "method": "POST",
         "body": {
-          "asset": "hackernews-01.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-02.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-03.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-04.ndjson"
+          "asset": "hackernews-01-modified-filters.ndjson"
         },
         "synchronous": "WaitForTask"
       }
-    ],
-    "commands": [
-        {
-          "route": "indexes/movies/documents",
-          "method": "POST",
-          "body": {
-            "asset": "hackernews-01-modified-filters.ndjson"
-          },
-          "synchronous": "WaitForTask"
-        }
-    ]
-  }
-  
\ No newline at end of file
+  ]
+}
+ 
\ No newline at end of file
diff --git a/workloads/hackernews-modify-searchables.json b/workloads/hackernews-modify-searchables.json
index 0f674ece0..248026f19 100644
--- a/workloads/hackernews-modify-searchables.json
+++ b/workloads/hackernews-modify-searchables.json
@@ -1,71 +1,113 @@
 {
-    "name": "hackernews.modify_searchables",
-    "run_count": 3,
-    "extra_cli_args": [],
-    "assets": {
-      "hackernews-01.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
-        "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
-      },
-      "hackernews-02.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
-        "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
-      },
-      "hackernews-03.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
-        "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
-      },
-      "hackernews-04.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
-        "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
-      },
-      "hackernews-05.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
-        "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
-      },
-      "hackernews-01-modified-searchables.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
-        "sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
-      },
-      "hackernews-02-modified-searchables.ndjson": {
-        "local_location": null,
-        "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
-        "sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
-      }
+  "name": "hackernews.modify_searchables",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-01.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01.ndjson",
+      "sha256": "cd3627b86c064d865b6754848ed0e73ef1d8142752a25e5f0765c3a1296dd3ae"
     },
-    "precommands": [
+    "hackernews-02.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02.ndjson",
+      "sha256": "5d533b83bcf992201dace88b4d0c0be8b4df5225c6c4b763582d986844bcc23b"
+    },
+    "hackernews-03.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/03.ndjson",
+      "sha256": "f5f351a0d04a8a83643ace12cafa2b7ec8ca8cb7d46fd268e5126492a6c66f2a"
+    },
+    "hackernews-04.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/04.ndjson",
+      "sha256": "ac1915ee7ce53a6718548c255a6cc59969784b2570745dc5b739f714beda291a"
+    },
+    "hackernews-05.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/05.ndjson",
+      "sha256": "be31d5632602f798e62d1c10c83bdfda2b4deaa068477eacde05fdd247572b82"
+    },
+    "hackernews-01-modified-searchables.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/01-modified-searchables.ndjson",
+      "sha256": "e5c08710c6af70031ac7212e0ba242c72ef29c8d4e1fce66c789544641452a7c"
+    },
+    "hackernews-02-modified-searchables.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-searchables.ndjson",
+      "sha256": "098b029851117087b1e26ccb7ac408eda9bba54c3008213a2880d6fab607346e"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time",
+            "text"
+          ],
+          "searchableAttributes": [
+            "title",
+            "text"
+          ],
+          "filterableAttributes": [
+            "by",
+            "kids",
+            "parent"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-01.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-02.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-03.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-04.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
       {
-        "route": "indexes/movies/settings",
-        "method": "PATCH",
+        "route": "indexes/movies/documents",
+        "method": "POST",
         "body": {
-          "inline": {
-            "displayedAttributes": [
-              "title",
-              "by",
-              "score",
-              "time",
-              "text"
-            ],
-            "searchableAttributes": [
-              "title",
-              "text"
-            ],
-            "filterableAttributes": [
-              "by",
-              "kids",
-              "parent"
-            ],
-            "sortableAttributes": [
-              "score",
-              "time"
-            ]
-          }
+          "asset": "hackernews-01-modified-searchables.ndjson"
         },
         "synchronous": "WaitForTask"
       },
@@ -73,52 +115,9 @@
         "route": "indexes/movies/documents",
         "method": "POST",
         "body": {
-          "asset": "hackernews-01.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-02.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-03.ndjson"
-        },
-        "synchronous": "WaitForResponse"
-      },
-      {
-        "route": "indexes/movies/documents",
-        "method": "POST",
-        "body": {
-          "asset": "hackernews-04.ndjson"
+          "asset": "hackernews-02-modified-searchables.ndjson"
         },
         "synchronous": "WaitForTask"
       }
-    ],
-    "commands": [
-        {
-          "route": "indexes/movies/documents",
-          "method": "POST",
-          "body": {
-            "asset": "hackernews-01-modified-searchables.ndjson"
-          },
-          "synchronous": "WaitForTask"
-        },
-        {
-          "route": "indexes/movies/documents",
-          "method": "POST",
-          "body": {
-            "asset": "hackernews-02-modified-searchables.ndjson"
-          },
-          "synchronous": "WaitForTask"
-        }
-    ]
-  }
-  
\ No newline at end of file
+  ]
+}

From 261d2ceb06553465115419afed01dc4b0cbbf848 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 14:16:40 +0100
Subject: [PATCH 076/158] Yield the BBQueue writer instead of spin looping

---
 crates/milli/src/update/new/channel.rs | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/crates/milli/src/update/new/channel.rs b/crates/milli/src/update/new/channel.rs
index ebd0ba429..7590c02ac 100644
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -661,6 +661,11 @@ where
         if sender.is_disconnected() {
             return Err(Error::InternalError(InternalError::AbortedIndexation));
         }
+
+        // We prefer to yield and allow the writing thread
+        // to do its job, especially beneficial when there
+        // is only one CPU core available.
+        std::thread::yield_now();
     }
 }
 

From fc1df5793cb5fa8d462081f0e4f1dad511a8746a Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 14:35:20 +0100
Subject: [PATCH 077/158] fix tests

---
 crates/meilisearch/tests/search/facet_search.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs
index 418cb4da4..23f312490 100644
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@@ -224,10 +224,10 @@ async fn add_documents_and_deactivate_facet_search() {
     assert_eq!(code, 400, "{}", response);
     snapshot!(response, @r###"
     {
-      "message": "Facet search is disabled for this index",
-      "code": "invalid_search_disabled_facet_search",
+      "message": "The facet search is disabled for this index",
+      "code": "invalid_facet_search_disabled",
       "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_search_disabled_facet_search"
+      "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled"
     }
     "###);
 }
@@ -255,10 +255,10 @@ async fn deactivate_facet_search_and_add_documents() {
     assert_eq!(code, 400, "{}", response);
     snapshot!(response, @r###"
     {
-      "message": "Facet search is disabled for this index",
-      "code": "invalid_search_disabled_facet_search",
+      "message": "The facet search is disabled for this index",
+      "code": "invalid_facet_search_disabled",
       "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_search_disabled_facet_search"
+      "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled"
     }
     "###);
 }

From 7458f0386c2259add91977f653c3d741d6809e08 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 14:44:57 +0100
Subject: [PATCH 078/158] fix asset name

---
 workloads/hackernews-modify-facet-numbers.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workloads/hackernews-modify-facet-numbers.json b/workloads/hackernews-modify-facet-numbers.json
index c0726aedd..59ade0561 100644
--- a/workloads/hackernews-modify-facet-numbers.json
+++ b/workloads/hackernews-modify-facet-numbers.json
@@ -102,7 +102,7 @@
         "route": "indexes/movies/documents",
         "method": "POST",
         "body": {
-          "asset": "hackernews-01-modified-filters.ndjson"
+          "asset": "hackernews-02-modified-filters.ndjson"
         },
         "synchronous": "WaitForTask"
       }

From bf742d81cfb66c2e98e5b26b046d8708421574c2 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 4 Dec 2024 14:47:02 +0100
Subject: [PATCH 079/158] add a test

---
 crates/index-scheduler/src/lib.rs | 31 ++++++++++++++++++++++++++++---
 1 file changed, 28 insertions(+), 3 deletions(-)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index d6de9c758..5e0e4f97a 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -4319,10 +4319,35 @@ mod tests {
         let proc = index_scheduler.processing_tasks.read().unwrap().clone();
 
         let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
-        let (batches, _) = index_scheduler
-            .get_batch_ids_from_authorized_indexes(&rtxn, &proc, &query, &AuthFilter::default())
+        let (mut batches, _) = index_scheduler
+            .get_batches_from_authorized_indexes(query.clone(), &AuthFilter::default())
             .unwrap();
-        snapshot!(snapshot_bitmap(&batches), @"[0,]"); // only the processing batch in the first tick
+        assert_eq!(batches.len(), 1);
+        batches[0].started_at = OffsetDateTime::UNIX_EPOCH;
+        // Insta cannot snapshot our batches because the batch stats contains an enum as key: https://github.com/mitsuhiko/insta/issues/689
+        let batch = serde_json::to_string_pretty(&batches[0]).unwrap();
+        snapshot!(batch, @r#"
+        {
+          "uid": 0,
+          "details": {
+            "primaryKey": "mouse"
+          },
+          "stats": {
+            "totalNbTasks": 2,
+            "status": {
+              "enqueued": 2
+            },
+            "types": {
+              "indexCreation": 2
+            },
+            "indexUids": {
+              "catto": 2
+            }
+          },
+          "startedAt": "1970-01-01T00:00:00Z",
+          "finishedAt": null
+        }
+        "#);
 
         let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
         let (batches, _) = index_scheduler

From cbcf6c9ba371614de222d03344cba7ab84ed7ab4 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 4 Dec 2024 14:48:48 +0100
Subject: [PATCH 080/158] make the processing tasks as processing in a batch

---
 crates/index-scheduler/src/lib.rs   | 2 +-
 crates/index-scheduler/src/utils.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index 5e0e4f97a..2d953fc6e 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -4335,7 +4335,7 @@ mod tests {
           "stats": {
             "totalNbTasks": 2,
             "status": {
-              "enqueued": 2
+              "processing": 2
             },
             "types": {
               "indexCreation": 2
diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs
index fc41d535c..356d77b35 100644
--- a/crates/index-scheduler/src/utils.rs
+++ b/crates/index-scheduler/src/utils.rs
@@ -67,7 +67,7 @@ impl ProcessingBatch {
             task.batch_uid = Some(self.uid);
             // We don't store the statuses in the map since they're all enqueued but we must
             // still store them in the stats since that can be displayed.
-            *self.stats.status.entry(task.status).or_default() += 1;
+            *self.stats.status.entry(Status::Processing).or_default() += 1;
 
             self.kinds.insert(task.kind.as_kind());
             *self.stats.types.entry(task.kind.as_kind()).or_default() += 1;

From 8388698993050ed95c7ba9411590d7ec052c11b8 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 15:09:10 +0100
Subject: [PATCH 081/158] Fix dat hash

---
 workloads/hackernews-modify-facet-numbers.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workloads/hackernews-modify-facet-numbers.json b/workloads/hackernews-modify-facet-numbers.json
index 59ade0561..f4171442f 100644
--- a/workloads/hackernews-modify-facet-numbers.json
+++ b/workloads/hackernews-modify-facet-numbers.json
@@ -31,7 +31,7 @@
     "hackernews-02-modified-filters.ndjson": {
       "local_location": null,
       "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/modification/02-modified-filters.ndjson",
-      "sha256": "1fcb6f89ddeff51c3fe7b86b3574f894ff9859a76cf056ab7e7dacc72970dabb"
+      "sha256": "7272cbfd41110d32d7fe168424a0000f07589bfe40f664652b34f4f20aaf3802"
     }
   },
   "precommands": [

From cb0c3a5aad0f3fa2ffcfe51a5a59480f8d3049ee Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 4 Dec 2024 15:43:05 +0100
Subject: [PATCH 082/158] stop adding one enqueued tasks to all unprioritized
 batches

---
 crates/index-scheduler/src/batch.rs | 3 +--
 crates/index-scheduler/src/lib.rs   | 8 ++++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index ce86c10ca..fc6fb194c 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -496,8 +496,7 @@ impl IndexScheduler {
 
         // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
         let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
-        let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
-        current_batch.processing(Some(&mut task));
+        let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
 
         // If the task is not associated with any index, verify that it is an index swap and
         // create the batch directly. Otherwise, get the index name associated with the task
diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index 2d953fc6e..9715e9e2f 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -4333,15 +4333,15 @@ mod tests {
             "primaryKey": "mouse"
           },
           "stats": {
-            "totalNbTasks": 2,
+            "totalNbTasks": 1,
             "status": {
-              "processing": 2
+              "processing": 1
             },
             "types": {
-              "indexCreation": 2
+              "indexCreation": 1
             },
             "indexUids": {
-              "catto": 2
+              "catto": 1
             }
           },
           "startedAt": "1970-01-01T00:00:00Z",

From 7a2af06b1ec31b8b1dbd7918ecd3b655b0c31aa6 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 4 Dec 2024 15:52:24 +0100
Subject: [PATCH 083/158] update the impacted snapshots

---
 .../lib.rs/cancel_mix_of_tasks/aborted_indexation.snap         | 2 +-
 .../processing_second_task_cancel_enqueued.snap                | 2 +-
 .../lib.rs/cancel_processing_dump/cancel_registered.snap       | 2 +-
 .../lib.rs/cancel_processing_task/aborted_indexation.snap      | 2 +-
 .../lib.rs/cancel_processing_task/cancel_task_registered.snap  | 2 +-
 .../lib.rs/cancel_processing_task/initial_task_processing.snap | 2 +-
 .../lib.rs/document_addition/after_the_batch_creation.snap     | 2 +-
 .../document_addition_batch_created.snap                       | 2 +-
 .../after_batch_succeeded.snap                                 | 2 +-
 .../after_failing_to_commit.snap                               | 2 +-
 .../after_batch_creation.snap                                  | 2 +-
 .../registered_the_second_task.snap                            | 2 +-
 .../registered_the_third_task.snap                             | 2 +-
 .../lib.rs/query_batches_simple/after-advancing-a-bit.snap     | 2 +-
 .../lib.rs/swap_indexes/third_empty_swap_processed.snap        | 3 +--
 .../task_deletion_undeleteable/task_deletion_processing.snap   | 2 +-
 16 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
index 9710c4911..b73714e36 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(1):
 [1,]
-{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
+{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
index e70aa0850..c24c36313 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(1):
 [1,]
-{uid: 1, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"beavero":2}}, }
+{uid: 1, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"beavero":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
index 55c7b3ed2..b9f33e598 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"dumpCreation":1},"indexUids":{}}, }
+{uid: 0, details: {"dumpUid":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"dumpCreation":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
index 91b4deb22..0b9a0d709 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
index 89e8c8c6f..fef6c20f6 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap
index 12e1b1283..3f45be007 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/initial_task_processing.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"catto":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"catto":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap b/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap
index f7eaa6df8..8beb49145 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/document_addition/after_the_batch_creation.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap
index f7eaa6df8..8beb49145 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_batch_created.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap
index 0091af65b..8ab4d84dd 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_batch_succeeded.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap
index 0091af65b..8ab4d84dd 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_update_task_after_process_batch_success_for_document_addition/after_failing_to_commit.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"receivedDocuments":2,"indexedDocuments":null}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"documentAdditionOrUpdate":2},"indexUids":{"doggos":2}}, }
+{uid: 0, details: {"receivedDocuments":1,"indexedDocuments":null}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap
index aafef2fce..9d3f29c48 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/after_batch_creation.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
+{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap
index 86fea2386..322bcf4ab 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_second_task.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
+{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap
index ea910f491..aa047e3ff 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/insert_task_while_another_task_is_processing/registered_the_third_task.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [0,]
-{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"index_a":2}}, }
+{uid: 0, details: {"primaryKey":"id"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"index_a":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("id") }, kind: IndexCreation { index_uid: "index_a", primary_key: Some("id") }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap b/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap
index 869e38e57..bf5d0528c 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/query_batches_simple/after-advancing-a-bit.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(1):
 [1,]
-{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":2,"status":{"enqueued":2},"types":{"indexCreation":2},"indexUids":{"doggo":2}}, }
+{uid: 1, details: {"primaryKey":"sheep"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap
index 77b1193a5..0f126b33a 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap
@@ -89,7 +89,7 @@ succeeded [0,1,2,3,4,5,6,]
 ----------------------------------------------------------------------
 ### Batches Kind:
 "indexCreation" [0,1,2,3,]
-"indexSwap" [4,5,6,]
+"indexSwap" [4,5,]
 ----------------------------------------------------------------------
 ### Batches Index Tasks:
 a [0,4,5,]
@@ -104,7 +104,6 @@ d [3,4,]
 [timestamp] [3,]
 [timestamp] [4,]
 [timestamp] [5,]
-[timestamp] [6,]
 ----------------------------------------------------------------------
 ### Batches Started At:
 [timestamp] [0,]
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
index fce223c6c..85a0afc46 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
@@ -5,7 +5,7 @@ snapshot_kind: text
 ### Autobatching Enabled = true
 ### Processing batch Some(0):
 [3,]
-{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"enqueued":1},"types":{"taskDeletion":1},"indexUids":{}}, }
+{uid: 0, details: {"matchedTasks":2,"deletedTasks":null,"originalFilter":"test_query"}, stats: {"totalNbTasks":1,"status":{"processing":1},"types":{"taskDeletion":1},"indexUids":{}}, }
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}

From 739c52a3cdc420f929e45ce6189f18d624dc904f Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 4 Dec 2024 16:16:48 +0100
Subject: [PATCH 084/158] Replace HashSets by BTreeSets for the prefixes

---
 .../milli/src/update/new/word_fst_builder.rs  | 12 +++----
 .../src/update/new/words_prefix_docids.rs     | 36 +++++++++----------
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/crates/milli/src/update/new/word_fst_builder.rs b/crates/milli/src/update/new/word_fst_builder.rs
index 6bc72d91d..a9a5222be 100644
--- a/crates/milli/src/update/new/word_fst_builder.rs
+++ b/crates/milli/src/update/new/word_fst_builder.rs
@@ -1,4 +1,4 @@
-use std::collections::HashSet;
+use std::collections::BTreeSet;
 use std::io::BufWriter;
 
 use fst::{Set, SetBuilder, Streamer};
@@ -75,8 +75,8 @@ pub struct PrefixData {
 
 #[derive(Debug)]
 pub struct PrefixDelta {
-    pub modified: HashSet<Prefix>,
-    pub deleted: HashSet<Prefix>,
+    pub modified: BTreeSet<Prefix>,
+    pub deleted: BTreeSet<Prefix>,
 }
 
 struct PrefixFstBuilder {
@@ -86,7 +86,7 @@ struct PrefixFstBuilder {
     prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
     current_prefix: Vec<Prefix>,
     current_prefix_count: Vec<usize>,
-    modified_prefixes: HashSet<Prefix>,
+    modified_prefixes: BTreeSet<Prefix>,
     current_prefix_is_modified: Vec<bool>,
 }
 
@@ -110,7 +110,7 @@ impl PrefixFstBuilder {
             prefix_fst_builders,
             current_prefix: vec![Prefix::new(); max_prefix_length],
             current_prefix_count: vec![0; max_prefix_length],
-            modified_prefixes: HashSet::new(),
+            modified_prefixes: BTreeSet::new(),
             current_prefix_is_modified: vec![false; max_prefix_length],
         })
     }
@@ -180,7 +180,7 @@ impl PrefixFstBuilder {
         let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
         let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
         let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
-        let mut deleted_prefixes = HashSet::new();
+        let mut deleted_prefixes = BTreeSet::new();
         {
             let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
             while let Some(prefix) = deleted_prefixes_stream.next() {
diff --git a/crates/milli/src/update/new/words_prefix_docids.rs b/crates/milli/src/update/new/words_prefix_docids.rs
index 7e56beeae..bf64049c3 100644
--- a/crates/milli/src/update/new/words_prefix_docids.rs
+++ b/crates/milli/src/update/new/words_prefix_docids.rs
@@ -1,5 +1,5 @@
 use std::cell::RefCell;
-use std::collections::HashSet;
+use std::collections::BTreeSet;
 use std::io::{BufReader, BufWriter, Read, Seek, Write};
 
 use hashbrown::HashMap;
@@ -37,8 +37,8 @@ impl WordPrefixDocids {
     fn execute(
         self,
         wtxn: &mut heed::RwTxn,
-        prefix_to_compute: &HashSet<Prefix>,
-        prefix_to_delete: &HashSet<Prefix>,
+        prefix_to_compute: &BTreeSet<Prefix>,
+        prefix_to_delete: &BTreeSet<Prefix>,
     ) -> Result<()> {
         delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
         self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@@ -48,7 +48,7 @@ impl WordPrefixDocids {
     fn recompute_modified_prefixes(
         &self,
         wtxn: &mut RwTxn,
-        prefixes: &HashSet<Prefix>,
+        prefixes: &BTreeSet<Prefix>,
     ) -> Result<()> {
         // We fetch the docids associated to the newly added word prefix fst only.
         // And collect the CboRoaringBitmaps pointers in an HashMap.
@@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
     pub fn from_prefixes(
         database: Database<Bytes, CboRoaringBitmapCodec>,
         rtxn: &'rtxn RoTxn,
-        prefixes: &'a HashSet<Prefix>,
+        prefixes: &'a BTreeSet<Prefix>,
     ) -> heed::Result<Self> {
         let database = database.remap_data_type::<Bytes>();
 
@@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
     fn execute(
         self,
         wtxn: &mut heed::RwTxn,
-        prefix_to_compute: &HashSet<Prefix>,
-        prefix_to_delete: &HashSet<Prefix>,
+        prefix_to_compute: &BTreeSet<Prefix>,
+        prefix_to_delete: &BTreeSet<Prefix>,
     ) -> Result<()> {
         delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
         self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
     fn recompute_modified_prefixes(
         &self,
         wtxn: &mut RwTxn,
-        prefixes: &HashSet<Prefix>,
+        prefixes: &BTreeSet<Prefix>,
     ) -> Result<()> {
         // We fetch the docids associated to the newly added word prefix fst only.
         // And collect the CboRoaringBitmaps pointers in an HashMap.
@@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
     pub fn from_prefixes(
         database: Database<Bytes, CboRoaringBitmapCodec>,
         rtxn: &'rtxn RoTxn,
-        prefixes: &'a HashSet<Prefix>,
+        prefixes: &'a BTreeSet<Prefix>,
     ) -> heed::Result<Self> {
         let database = database.remap_data_type::<Bytes>();
 
@@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
 fn delete_prefixes(
     wtxn: &mut RwTxn,
     prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
-    prefixes: &HashSet<Prefix>,
+    prefixes: &BTreeSet<Prefix>,
 ) -> Result<()> {
     // We remove all the entries that are no more required in this word prefix docids database.
     for prefix in prefixes {
@@ -309,8 +309,8 @@ fn delete_prefixes(
 pub fn compute_word_prefix_docids(
     wtxn: &mut RwTxn,
     index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
     grenad_parameters: GrenadParameters,
 ) -> Result<()> {
     WordPrefixDocids::new(
@@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
 pub fn compute_exact_word_prefix_docids(
     wtxn: &mut RwTxn,
     index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
     grenad_parameters: GrenadParameters,
 ) -> Result<()> {
     WordPrefixDocids::new(
@@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
 pub fn compute_word_prefix_fid_docids(
     wtxn: &mut RwTxn,
     index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
     grenad_parameters: GrenadParameters,
 ) -> Result<()> {
     WordPrefixIntegerDocids::new(
@@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
 pub fn compute_word_prefix_position_docids(
     wtxn: &mut RwTxn,
     index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
     grenad_parameters: GrenadParameters,
 ) -> Result<()> {
     WordPrefixIntegerDocids::new(

From 29ef1645305b5b1f1d37011fec05f7c2b8ca66f7 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 16:33:35 +0100
Subject: [PATCH 085/158] Introduce a new semi ordered merge function

---
 crates/milli/src/update/new/extract/cache.rs | 110 +++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index be077d142..ae5ade17e 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -566,6 +566,116 @@ where
     Ok(())
 }
 
+/// Merges the caches that must be all associated to the same bucket.
+///
+/// It merges entries like the `merge_caches` function
+pub fn merge_caches_alt<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
+where
+    F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
+{
+    let mut maps = Vec::new();
+    let mut readers = Vec::new();
+    let mut current_bucket = None;
+    for FrozenCache { bucket, cache, ref mut spilled } in frozen {
+        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
+        maps.push(cache);
+        readers.append(spilled);
+    }
+
+    // First manage the spilled entries by looking into the HashMaps,
+    // merge them and mark them as dummy.
+    let mut heap = BinaryHeap::new();
+    for (source_index, source) in readers.into_iter().enumerate() {
+        let mut cursor = source.into_cursor()?;
+        if cursor.move_on_next()?.is_some() {
+            heap.push(Entry { cursor, source_index });
+        }
+    }
+
+    loop {
+        let mut first_entry = match heap.pop() {
+            Some(entry) => entry,
+            None => break,
+        };
+
+        let (first_key, first_value) = match first_entry.cursor.current() {
+            Some((key, value)) => (key, value),
+            None => break,
+        };
+
+        let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
+        while let Some(mut entry) = heap.peek_mut() {
+            if let Some((key, _value)) = entry.cursor.current() {
+                if first_key == key {
+                    let new = DelAddRoaringBitmap::from_bytes(first_value)?;
+                    output = output.merge(new);
+                    // When we are done we the current value of this entry move make
+                    // it move forward and let the heap reorganize itself (on drop)
+                    if entry.cursor.move_on_next()?.is_none() {
+                        PeekMut::pop(entry);
+                    }
+                } else {
+                    break;
+                }
+            }
+        }
+
+        // Once we merged all of the spilled bitmaps we must also
+        // fetch the entries from the non-spilled entries (the HashMaps).
+        for (map_index, map) in maps.iter_mut().enumerate() {
+            if first_entry.source_index != map_index {
+                if let Some(new) = map.get_mut(first_key) {
+                    output.union_and_clear_bbbul(new);
+                }
+            }
+        }
+
+        // We send the merged entry outside.
+        (f)(first_key, output)?;
+
+        // Don't forget to put the first entry back into the heap.
+        if first_entry.cursor.move_on_next()?.is_some() {
+            heap.push(first_entry)
+        }
+    }
+
+    // Then manage the content on the HashMap entries that weren't taken (mem::take).
+    let order_count = 1000;
+    while let Some(mut map) = maps.pop() {
+        let mut iter = map.iter_mut();
+
+        loop {
+            let mut ordered_buffer: Vec<_> = iter.by_ref().take(order_count).collect();
+            ordered_buffer.sort_unstable_by_key(|(key, _)| *key);
+
+            if ordered_buffer.is_empty() {
+                break;
+            }
+
+            for (key, bbbul) in ordered_buffer.drain(..) {
+                // Make sure we don't try to work with entries already managed by the spilled
+                if bbbul.is_empty() {
+                    continue;
+                }
+
+                let mut output = DelAddRoaringBitmap::empty();
+                output.union_and_clear_bbbul(bbbul);
+
+                for rhs in maps.iter_mut() {
+                    if let Some(new) = rhs.get_mut(key) {
+                        output.union_and_clear_bbbul(new);
+                    }
+                }
+
+                // We send the merged entry outside.
+                (f)(key, output)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 struct Entry<R> {
     cursor: ReaderCursor<R>,
     source_index: usize,

From be411435f5248531f9b5b7891016e5e7304d5a83 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 16:37:29 +0100
Subject: [PATCH 086/158] Use the merge_caches_alt function in the docids
 merging

---
 crates/milli/src/update/new/extract/mod.rs | 5 ++++-
 crates/milli/src/update/new/merger.rs      | 6 +++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs
index e67f70db1..3601dd9c6 100644
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@@ -6,7 +6,10 @@ mod searchable;
 mod vectors;
 
 use bumpalo::Bump;
-pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
+pub use cache::{
+    merge_caches, merge_caches_alt, transpose_and_freeze_caches, BalancedCaches,
+    DelAddRoaringBitmap,
+};
 pub use documents::*;
 pub use faceted::*;
 pub use geo::*;
diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index b650b6b53..9f2aae5a8 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
 
 use super::channel::*;
 use super::extract::{
-    merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
-    GeoExtractorData,
+    merge_caches, merge_caches_alt, transpose_and_freeze_caches, BalancedCaches,
+    DelAddRoaringBitmap, FacetKind, GeoExtractorData,
 };
 use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
 
@@ -78,7 +78,7 @@ where
         if must_stop_processing() {
             return Err(InternalError::AbortedIndexation.into());
         }
-        merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
+        merge_caches_alt(frozen, |key, DelAddRoaringBitmap { del, add }| {
             let current = database.get(&rtxn, key)?;
             match merge_cbo_bitmaps(current, del, add)? {
                 Operation::Write(bitmap) => {

From cb99ac6f7eddef97bb4386987b3151ecd40219f4 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 17:00:22 +0100
Subject: [PATCH 087/158] Consume vec instead of draining

---
 crates/milli/src/update/new/extract/cache.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index ae5ade17e..b57ba6b9b 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -652,7 +652,7 @@ where
                 break;
             }
 
-            for (key, bbbul) in ordered_buffer.drain(..) {
+            for (key, bbbul) in ordered_buffer {
                 // Make sure we don't try to work with entries already managed by the spilled
                 if bbbul.is_empty() {
                     continue;

From 2e32d0474ccc846bbe86c0bbafd88368f82e8a3e Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 17:05:07 +0100
Subject: [PATCH 088/158] Lexicographically sort all the map to merge

---
 crates/milli/src/update/new/extract/cache.rs | 38 +++++++-------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index b57ba6b9b..325a72280 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -640,36 +640,24 @@ where
     }
 
     // Then manage the content on the HashMap entries that weren't taken (mem::take).
-    let order_count = 1000;
     while let Some(mut map) = maps.pop() {
-        let mut iter = map.iter_mut();
+        // Make sure we don't try to work with entries already managed by the spilled
+        let mut ordered_entries: Vec<_> =
+            map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
+        ordered_entries.sort_unstable_by_key(|(key, _)| *key);
 
-        loop {
-            let mut ordered_buffer: Vec<_> = iter.by_ref().take(order_count).collect();
-            ordered_buffer.sort_unstable_by_key(|(key, _)| *key);
+        for (key, bbbul) in ordered_entries {
+            let mut output = DelAddRoaringBitmap::empty();
+            output.union_and_clear_bbbul(bbbul);
 
-            if ordered_buffer.is_empty() {
-                break;
+            for rhs in maps.iter_mut() {
+                if let Some(new) = rhs.get_mut(key) {
+                    output.union_and_clear_bbbul(new);
+                }
             }
 
-            for (key, bbbul) in ordered_buffer {
-                // Make sure we don't try to work with entries already managed by the spilled
-                if bbbul.is_empty() {
-                    continue;
-                }
-
-                let mut output = DelAddRoaringBitmap::empty();
-                output.union_and_clear_bbbul(bbbul);
-
-                for rhs in maps.iter_mut() {
-                    if let Some(new) = rhs.get_mut(key) {
-                        output.union_and_clear_bbbul(new);
-                    }
-                }
-
-                // We send the merged entry outside.
-                (f)(key, output)?;
-            }
+            // We send the merged entry outside.
+            (f)(key, output)?;
         }
     }
 

From 2da5584bb555a564c382774bd4ad03ae39184ddb Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 17:39:07 +0100
Subject: [PATCH 089/158] Make the tasks pulling timeout configurable

---
 crates/xtask/src/bench/mod.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/xtask/src/bench/mod.rs b/crates/xtask/src/bench/mod.rs
index fdb2c4963..891742528 100644
--- a/crates/xtask/src/bench/mod.rs
+++ b/crates/xtask/src/bench/mod.rs
@@ -82,6 +82,10 @@ pub struct BenchDeriveArgs {
     /// Reason for the benchmark invocation
     #[arg(short, long)]
     reason: Option<String>,
+
+    /// The maximum time in seconds we allow for fetching the task queue before timing out.
+    #[arg(long, default_value_t = 60)]
+    tasks_queue_timeout_secs: u64,
 }
 
 pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
@@ -127,7 +131,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
     let meili_client = Client::new(
         Some("http://127.0.0.1:7700".into()),
         args.master_key.as_deref(),
-        Some(std::time::Duration::from_secs(60)),
+        Some(std::time::Duration::from_secs(args.tasks_queue_timeout_secs)),
     )?;
 
     // enter runtime

From d0c4e6da6bceb7d079c1a29ac5d95d796a63810c Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 4 Dec 2024 14:32:45 +0100
Subject: [PATCH 090/158] Make clippy happy

---
 crates/xtask/src/main.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/xtask/src/main.rs b/crates/xtask/src/main.rs
index b81424666..942362f4f 100644
--- a/crates/xtask/src/main.rs
+++ b/crates/xtask/src/main.rs
@@ -16,6 +16,7 @@ struct ListFeaturesDeriveArgs {
 #[command(author, version, about, long_about)]
 #[command(name = "cargo xtask")]
 #[command(bin_name = "cargo xtask")]
+#[allow(clippy::large_enum_variant)] // please, that's enough...
 enum Command {
     ListFeatures(ListFeaturesDeriveArgs),
     Bench(BenchDeriveArgs),

From 5f896b1050ebef939ab68b8ba569193278d61ebb Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 4 Dec 2024 17:51:12 +0100
Subject: [PATCH 091/158] Fix geo when spilling

---
 .../milli/src/update/new/extract/geo/mod.rs   | 28 +++++++++++--------
 crates/milli/src/update/new/merger.rs         |  4 +--
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs
index 09d2ce0f8..a3820609d 100644
--- a/crates/milli/src/update/new/extract/geo/mod.rs
+++ b/crates/milli/src/update/new/extract/geo/mod.rs
@@ -1,6 +1,6 @@
 use std::cell::RefCell;
 use std::fs::File;
-use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
+use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
 use std::{iter, mem, result};
 
 use bumpalo::Bump;
@@ -97,30 +97,34 @@ pub struct FrozenGeoExtractorData<'extractor> {
 impl<'extractor> FrozenGeoExtractorData<'extractor> {
     pub fn iter_and_clear_removed(
         &mut self,
-    ) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
-        mem::take(&mut self.removed)
+    ) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
+        Ok(mem::take(&mut self.removed)
             .iter()
             .copied()
             .map(Ok)
-            .chain(iterator_over_spilled_geopoints(&mut self.spilled_removed))
+            .chain(iterator_over_spilled_geopoints(&mut self.spilled_removed)?))
     }
 
     pub fn iter_and_clear_inserted(
         &mut self,
-    ) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
-        mem::take(&mut self.inserted)
+    ) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
+        Ok(mem::take(&mut self.inserted)
             .iter()
             .copied()
             .map(Ok)
-            .chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted))
+            .chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted)?))
     }
 }
 
 fn iterator_over_spilled_geopoints(
     spilled: &mut Option<BufReader<File>>,
-) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
+) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
     let mut spilled = spilled.take();
-    iter::from_fn(move || match &mut spilled {
+    if let Some(spilled) = &mut spilled {
+        spilled.rewind()?;
+    }
+
+    Ok(iter::from_fn(move || match &mut spilled {
         Some(file) => {
             let geopoint_bytes = &mut [0u8; mem::size_of::<ExtractedGeoPoint>()];
             match file.read_exact(geopoint_bytes) {
@@ -130,7 +134,7 @@ fn iterator_over_spilled_geopoints(
             }
         }
         None => None,
-    })
+    }))
 }
 
 impl<'extractor> Extractor<'extractor> for GeoExtractor {
@@ -157,7 +161,9 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
         let mut data_ref = context.data.borrow_mut_or_yield();
 
         for change in changes {
-            if max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm) {
+            if data_ref.spilled_removed.is_none()
+                && max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
+            {
                 // We must spill as we allocated too much memory
                 data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
                 data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index b650b6b53..512e094fb 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -34,7 +34,7 @@ where
         }
 
         let mut frozen = data.into_inner().freeze()?;
-        for result in frozen.iter_and_clear_removed() {
+        for result in frozen.iter_and_clear_removed()? {
             let extracted_geo_point = result?;
             let removed = rtree.remove(&GeoPoint::from(extracted_geo_point));
             debug_assert!(removed.is_some());
@@ -42,7 +42,7 @@ where
             debug_assert!(removed);
         }
 
-        for result in frozen.iter_and_clear_inserted() {
+        for result in frozen.iter_and_clear_inserted()? {
             let extracted_geo_point = result?;
             rtree.insert(GeoPoint::from(extracted_geo_point));
             let inserted = faceted.insert(extracted_geo_point.docid);

From 3a11e39c010d474129e1c4816c61d9f96bdead00 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 4 Dec 2024 17:52:53 +0100
Subject: [PATCH 092/158] Force max_memory to a min of 100MiB

---
 crates/milli/src/update/new/indexer/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 383823de1..9ee7577a5 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -88,7 +88,9 @@ where
             // 2% of the indexing memory
             let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
             let new_grenad_parameters = GrenadParameters {
-                max_memory: Some(max_memory.saturating_sub(total_bbbuffer_capacity)),
+                max_memory: Some(
+                    max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
+                ),
                 ..grenad_parameters
             };
             (new_grenad_parameters, total_bbbuffer_capacity)

From 52843123d49d5b8a7903a9c6f95ae584f7e87a8c Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Thu, 5 Dec 2024 10:03:05 +0100
Subject: [PATCH 093/158] Clean up and remove the non-sorted merge_caches
 function

---
 crates/milli/src/update/new/extract/cache.rs | 103 +------------------
 crates/milli/src/update/new/extract/mod.rs   |   3 +-
 crates/milli/src/update/new/merger.rs        |   8 +-
 3 files changed, 8 insertions(+), 106 deletions(-)

diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index 325a72280..658a3127c 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -466,110 +466,13 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
     Ok(bucket_caches)
 }
 
-/// Merges the caches that must be all associated to the same bucket.
+/// Merges the caches that must be all associated to the same bucket
+/// but make sure to sort the different buckets before performing the merges.
 ///
 /// # Panics
 ///
 /// - If the bucket IDs in these frozen caches are not exactly the same.
-pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
-where
-    F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
-{
-    let mut maps = Vec::new();
-    let mut readers = Vec::new();
-    let mut current_bucket = None;
-    for FrozenCache { bucket, cache, ref mut spilled } in frozen {
-        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
-        maps.push(cache);
-        readers.append(spilled);
-    }
-
-    // First manage the spilled entries by looking into the HashMaps,
-    // merge them and mark them as dummy.
-    let mut heap = BinaryHeap::new();
-    for (source_index, source) in readers.into_iter().enumerate() {
-        let mut cursor = source.into_cursor()?;
-        if cursor.move_on_next()?.is_some() {
-            heap.push(Entry { cursor, source_index });
-        }
-    }
-
-    loop {
-        let mut first_entry = match heap.pop() {
-            Some(entry) => entry,
-            None => break,
-        };
-
-        let (first_key, first_value) = match first_entry.cursor.current() {
-            Some((key, value)) => (key, value),
-            None => break,
-        };
-
-        let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
-        while let Some(mut entry) = heap.peek_mut() {
-            if let Some((key, _value)) = entry.cursor.current() {
-                if first_key == key {
-                    let new = DelAddRoaringBitmap::from_bytes(first_value)?;
-                    output = output.merge(new);
-                    // When we are done we the current value of this entry move make
-                    // it move forward and let the heap reorganize itself (on drop)
-                    if entry.cursor.move_on_next()?.is_none() {
-                        PeekMut::pop(entry);
-                    }
-                } else {
-                    break;
-                }
-            }
-        }
-
-        // Once we merged all of the spilled bitmaps we must also
-        // fetch the entries from the non-spilled entries (the HashMaps).
-        for (map_index, map) in maps.iter_mut().enumerate() {
-            if first_entry.source_index != map_index {
-                if let Some(new) = map.get_mut(first_key) {
-                    output.union_and_clear_bbbul(new);
-                }
-            }
-        }
-
-        // We send the merged entry outside.
-        (f)(first_key, output)?;
-
-        // Don't forget to put the first entry back into the heap.
-        if first_entry.cursor.move_on_next()?.is_some() {
-            heap.push(first_entry)
-        }
-    }
-
-    // Then manage the content on the HashMap entries that weren't taken (mem::take).
-    while let Some(mut map) = maps.pop() {
-        for (key, bbbul) in map.iter_mut() {
-            // Make sure we don't try to work with entries already managed by the spilled
-            if bbbul.is_empty() {
-                continue;
-            }
-
-            let mut output = DelAddRoaringBitmap::empty();
-            output.union_and_clear_bbbul(bbbul);
-
-            for rhs in maps.iter_mut() {
-                if let Some(new) = rhs.get_mut(key) {
-                    output.union_and_clear_bbbul(new);
-                }
-            }
-
-            // We send the merged entry outside.
-            (f)(key, output)?;
-        }
-    }
-
-    Ok(())
-}
-
-/// Merges the caches that must be all associated to the same bucket.
-///
-/// It merges entries like the `merge_caches` function
-pub fn merge_caches_alt<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
+pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
 where
     F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
 {
diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs
index 3601dd9c6..0bdf31635 100644
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@@ -7,8 +7,7 @@ mod vectors;
 
 use bumpalo::Bump;
 pub use cache::{
-    merge_caches, merge_caches_alt, transpose_and_freeze_caches, BalancedCaches,
-    DelAddRoaringBitmap,
+    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
 };
 pub use documents::*;
 pub use faceted::*;
diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index 9f2aae5a8..85f5a70f7 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
 
 use super::channel::*;
 use super::extract::{
-    merge_caches, merge_caches_alt, transpose_and_freeze_caches, BalancedCaches,
-    DelAddRoaringBitmap, FacetKind, GeoExtractorData,
+    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
+    FacetKind, GeoExtractorData,
 };
 use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};
 
@@ -78,7 +78,7 @@ where
         if must_stop_processing() {
             return Err(InternalError::AbortedIndexation.into());
         }
-        merge_caches_alt(frozen, |key, DelAddRoaringBitmap { del, add }| {
+        merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
             let current = database.get(&rtxn, key)?;
             match merge_cbo_bitmaps(current, del, add)? {
                 Operation::Write(bitmap) => {
@@ -107,7 +107,7 @@ pub fn merge_and_send_facet_docids<'extractor>(
         .map(|frozen| {
             let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
             let rtxn = index.read_txn()?;
-            merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
+            merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
                 let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
                 match merge_cbo_bitmaps(current, del, add)? {
                     Operation::Write(bitmap) => {

From 9020a50df89de88d79528663869562d892d1ad4f Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Thu, 5 Dec 2024 10:14:46 +0100
Subject: [PATCH 094/158] Change the default max memory usage to 5% of the
 total memory

---
 crates/meilisearch/src/option.rs | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/crates/meilisearch/src/option.rs b/crates/meilisearch/src/option.rs
index 7e87a5a2c..7c59f0607 100644
--- a/crates/meilisearch/src/option.rs
+++ b/crates/meilisearch/src/option.rs
@@ -654,8 +654,9 @@ impl Opt {
 
 #[derive(Debug, Default, Clone, Parser, Deserialize)]
 pub struct IndexerOpts {
-    /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
-    /// uses no more than two thirds of available memory.
+    /// Specifies the maximum resident memory that Meilisearch can use for indexing.
+    /// By default, Meilisearch limits the RAM usage to 5% of the total available memory.
+    /// Note that the underlying store utilizes memory-mapping and makes use of the rest.
     #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
     #[serde(default)]
     pub max_indexing_memory: MaxMemory,
@@ -714,7 +715,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
     }
 }
 
-/// A type used to detect the max memory available and use 2/3 of it.
+/// A type used to detect the max resident memory available and use 5% of it.
 #[derive(Debug, Clone, Copy, Deserialize, Serialize)]
 pub struct MaxMemory(Option<Byte>);
 
@@ -728,7 +729,7 @@ impl FromStr for MaxMemory {
 
 impl Default for MaxMemory {
     fn default() -> MaxMemory {
-        MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
+        MaxMemory(total_memory_bytes().map(|bytes| bytes * 5 / 100).map(Byte::from_u64))
     }
 }
 

From 95975944d70ff23ade0210218a09aed6a05f3dbb Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 5 Dec 2024 14:23:38 +0100
Subject: [PATCH 095/158] fix the dumps missing the empty swap index tasks

---
 crates/index-scheduler/src/batch.rs                            | 3 ++-
 .../lib.rs/swap_indexes/third_empty_swap_processed.snap        | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index fc6fb194c..cc730e286 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -496,7 +496,7 @@ impl IndexScheduler {
 
         // 5. We make a batch from the unprioritised tasks. Start by taking the next enqueued task.
         let task_id = if let Some(task_id) = enqueued.min() { task_id } else { return Ok(None) };
-        let task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
+        let mut task = self.get_task(rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
 
         // If the task is not associated with any index, verify that it is an index swap and
         // create the batch directly. Otherwise, get the index name associated with the task
@@ -506,6 +506,7 @@ impl IndexScheduler {
             index_name
         } else {
             assert!(matches!(&task.kind, KindWithContent::IndexSwap { swaps } if swaps.is_empty()));
+            current_batch.processing(Some(&mut task));
             return Ok(Some((Batch::IndexSwap { task }, current_batch)));
         };
 
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap
index 0f126b33a..77b1193a5 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/swap_indexes/third_empty_swap_processed.snap
@@ -89,7 +89,7 @@ succeeded [0,1,2,3,4,5,6,]
 ----------------------------------------------------------------------
 ### Batches Kind:
 "indexCreation" [0,1,2,3,]
-"indexSwap" [4,5,]
+"indexSwap" [4,5,6,]
 ----------------------------------------------------------------------
 ### Batches Index Tasks:
 a [0,4,5,]
@@ -104,6 +104,7 @@ d [3,4,]
 [timestamp] [3,]
 [timestamp] [4,]
 [timestamp] [5,]
+[timestamp] [6,]
 ----------------------------------------------------------------------
 ### Batches Started At:
 [timestamp] [0,]

From 214b51de879d977e6c167287195ad59185c05a75 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Thu, 5 Dec 2024 14:45:54 +0100
Subject: [PATCH 096/158] try to fix the snapshot on demand flaky test

---
 crates/meilisearch/tests/common/mod.rs   | 19 +++++++++++++++++++
 crates/meilisearch/tests/snapshot/mod.rs |  8 ++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/crates/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs
index 3aae2fe80..44385752e 100644
--- a/crates/meilisearch/tests/common/mod.rs
+++ b/crates/meilisearch/tests/common/mod.rs
@@ -52,6 +52,25 @@ impl Value {
         }
         self
     }
+
+    /// Return `true` if the `status` field is set to `failed`.
+    /// Panic if the `status` field doesn't exists.
+    #[track_caller]
+    pub fn is_fail(&self) -> bool {
+        if !self["status"].is_string() {
+            panic!("Called `is_fail` on {}", serde_json::to_string_pretty(&self.0).unwrap());
+        }
+        self["status"] == serde_json::Value::String(String::from("failed"))
+    }
+
+    // Panic if the json doesn't contain the `status` field set to "succeeded"
+    #[track_caller]
+    pub fn failed(&self) -> &Self {
+        if !self.is_fail() {
+            panic!("Called failed on {}", serde_json::to_string_pretty(&self.0).unwrap());
+        }
+        self
+    }
 }
 
 impl From<serde_json::Value> for Value {
diff --git a/crates/meilisearch/tests/snapshot/mod.rs b/crates/meilisearch/tests/snapshot/mod.rs
index 976551190..0d569fc7c 100644
--- a/crates/meilisearch/tests/snapshot/mod.rs
+++ b/crates/meilisearch/tests/snapshot/mod.rs
@@ -129,11 +129,11 @@ async fn perform_on_demand_snapshot() {
 
     index.load_test_set().await;
 
-    server.index("doggo").create(Some("bone")).await;
-    index.wait_task(2).await;
+    let (task, _) = server.index("doggo").create(Some("bone")).await;
+    index.wait_task(task.uid()).await.succeeded();
 
-    server.index("doggo").create(Some("bone")).await;
-    index.wait_task(2).await;
+    let (task, _) = server.index("doggo").create(Some("bone")).await;
+    index.wait_task(task.uid()).await.failed();
 
     let (task, code) = server.create_snapshot().await;
     snapshot!(code, @"202 Accepted");

From a0a3b55700aaf6bd633d57b494d53ce66df0bf72 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 5 Dec 2024 14:48:29 +0100
Subject: [PATCH 097/158] Change error code

---
 crates/meilisearch-types/src/error.rs           | 2 +-
 crates/meilisearch/src/search/mod.rs            | 2 +-
 crates/meilisearch/tests/search/facet_search.rs | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs
index c68059682..afc876b42 100644
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@@ -279,7 +279,7 @@ InvalidSearchPage                     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchQ                        , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchQuery               , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchName                , InvalidRequest       , BAD_REQUEST ;
-InvalidFacetSearchDisabled            , InvalidRequest       , BAD_REQUEST ;
+FacetSearchDisabled                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchVector                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowMatchesPosition      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchShowRankingScore         , InvalidRequest       , BAD_REQUEST ;
diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs
index 9e0c936b7..7beaad6a5 100644
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -1410,7 +1410,7 @@ pub fn perform_facet_search(
     if !index.facet_search(&rtxn)? {
         return Err(ResponseError::from_msg(
             "The facet search is disabled for this index".to_string(),
-            Code::InvalidFacetSearchDisabled,
+            Code::FacetSearchDisabled,
         ));
     }
 
diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs
index 23f312490..19224c3df 100644
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@@ -225,9 +225,9 @@ async fn add_documents_and_deactivate_facet_search() {
     snapshot!(response, @r###"
     {
       "message": "The facet search is disabled for this index",
-      "code": "invalid_facet_search_disabled",
+      "code": "facet_search_disabled",
       "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled"
+      "link": "https://docs.meilisearch.com/errors#facet_search_disabled"
     }
     "###);
 }
@@ -256,9 +256,9 @@ async fn deactivate_facet_search_and_add_documents() {
     snapshot!(response, @r###"
     {
       "message": "The facet search is disabled for this index",
-      "code": "invalid_facet_search_disabled",
+      "code": "facet_search_disabled",
       "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_facet_search_disabled"
+      "link": "https://docs.meilisearch.com/errors#facet_search_disabled"
     }
     "###);
 }

From c77073efcca508df72eede587401fd94235cfd4c Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 5 Dec 2024 15:50:12 +0100
Subject: [PATCH 098/158] Update::has_changed_for_fields

---
 .../milli/src/update/new/document_change.rs   | 79 ++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs
index 899655db1..1644b2254 100644
--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@@ -1,7 +1,10 @@
 use bumpalo::Bump;
 use heed::RoTxn;
 
-use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
+use super::document::{
+    Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
+};
+use super::extract::perm_json_p;
 use super::vector_document::{
     MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
 };
@@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
         }
     }
 
+    /// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
+    ///
+    /// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
+    /// Otherwise `false`.
+    pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
+        &self,
+        fields: Option<&[&str]>,
+        rtxn: &'t RoTxn,
+        index: &'t Index,
+        mapper: &'t Mapper,
+    ) -> Result<bool> {
+        let mut changed = false;
+        let mut cached_current = None;
+        let mut updated_selected_field_count = 0;
+
+        for entry in self.updated().iter_top_level_fields() {
+            let (key, updated_value) = entry?;
+
+            if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
+                continue;
+            }
+
+            updated_selected_field_count += 1;
+            let current = match cached_current {
+                Some(current) => current,
+                None => self.current(rtxn, index, mapper)?,
+            };
+            let current_value = current.top_level_field(key)?;
+            let Some(current_value) = current_value else {
+                changed = true;
+                break;
+            };
+
+            if current_value.get() != updated_value.get() {
+                changed = true;
+                break;
+            }
+            cached_current = Some(current);
+        }
+
+        if !self.has_deletion {
+            // no field deletion, so fields that don't appear in `updated` cannot have changed
+            return Ok(changed);
+        }
+
+        if changed {
+            return Ok(true);
+        }
+
+        // we saw all updated fields, and set `changed` if any field wasn't in `current`.
+        // so if there are as many fields in `current` as in `updated`, then nothing changed.
+        // If there is any more fields in `current`, then they are missing in `updated`.
+        let has_deleted_fields = {
+            let current = match cached_current {
+                Some(current) => current,
+                None => self.current(rtxn, index, mapper)?,
+            };
+
+            let mut current_selected_field_count = 0;
+            for entry in current.iter_top_level_fields() {
+                let (key, _) = entry?;
+
+                if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
+                    continue;
+                }
+                current_selected_field_count += 1;
+            }
+
+            current_selected_field_count != updated_selected_field_count
+        };
+
+        Ok(has_deleted_fields)
+    }
+
     pub fn updated_vectors(
         &self,
         doc_alloc: &'doc Bump,

From c77b00d3ac9c9ed893ae7be940a57eebd3efd338 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 5 Dec 2024 15:51:58 +0100
Subject: [PATCH 099/158] Don't extract word docids when no searchable changed

---
 .../new/extract/searchable/extract_word_docids.rs    | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
index 05e2374dc..39f67e417 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -8,8 +8,9 @@ use bumpalo::Bump;
 use heed::RoTxn;
 
 use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
+use crate::update::new::document::Document as _;
 use crate::update::new::extract::cache::BalancedCaches;
-use crate::update::new::extract::perm_json_p::contained_in;
+use crate::update::new::extract::perm_json_p::{self, contained_in};
 use crate::update::new::indexer::document_changes::{
     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
 };
@@ -351,6 +352,15 @@ impl WordDocidsExtractors {
                 )?;
             }
             DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    document_tokenizer.attribute_to_extract,
+                    &context.rtxn,
+                    context.index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                 let mut token_fn = |fname: &str, fid, pos, word: &str| {
                     cached_sorter.insert_del_u32(
                         fid,

From 2b74d1824bca13d92757433391fd0f7ad0fabb43 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 5 Dec 2024 15:56:22 +0100
Subject: [PATCH 100/158] Ignore documents that didn't change any field in word
 pair proximity

---
 .../searchable/extract_word_pair_proximity_docids.rs     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
index dcd9e3a78..e58c0efd2 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
                 )?;
             }
             DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    document_tokenizer.attribute_to_extract,
+                    rtxn,
+                    index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                 let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
                 process_document_tokens(
                     document,

From fa8b9acdf6aa932d9a5421ff4f4347f39b280a5e Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 5 Dec 2024 16:12:52 +0100
Subject: [PATCH 101/158] Ignore documents that didn't change in facets

---
 .../src/update/new/extract/faceted/extract_facets.rs     | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
index f2132ce38..b865d0a35 100644
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
                 },
             ),
             DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    Some(attributes_to_extract),
+                    rtxn,
+                    index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                 extract_document_facets(
                     attributes_to_extract,
                     inner.current(rtxn, index, context.db_fields_ids_map)?,

From bd5110a2fed4c36f8d57c8eba2de94367e750c96 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 5 Dec 2024 16:13:07 +0100
Subject: [PATCH 102/158] Fix clippy warnings

---
 .../src/update/new/extract/searchable/extract_word_docids.rs   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
index 39f67e417..06fb747c6 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -8,9 +8,8 @@ use bumpalo::Bump;
 use heed::RoTxn;
 
 use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
-use crate::update::new::document::Document as _;
 use crate::update::new::extract::cache::BalancedCaches;
-use crate::update::new::extract::perm_json_p::{self, contained_in};
+use crate::update::new::extract::perm_json_p::contained_in;
 use crate::update::new::indexer::document_changes::{
     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
 };

From 95ed07976146fdab8a3be7000ce9cc1adbc9d726 Mon Sep 17 00:00:00 2001
From: airycanon <airycanon@airycanon.me>
Date: Fri, 22 Nov 2024 14:11:56 +0800
Subject: [PATCH 103/158] attach index name in errors

# Conflicts:
#	crates/index-scheduler/src/batch.rs

# Conflicts:
#	crates/index-scheduler/src/batch.rs
#	crates/meilisearch/src/search/mod.rs
---
 crates/index-scheduler/src/error.rs           | 15 +++++++++----
 .../src/index_mapper/index_map.rs             |  4 ++--
 .../index-scheduler/src/index_mapper/mod.rs   | 13 ++++++------
 crates/index-scheduler/src/lib.rs             | 21 ++++++++++++-------
 crates/meilisearch/src/error.rs               | 16 +++++++++++---
 crates/meilisearch/src/lib.rs                 |  3 ++-
 .../src/routes/indexes/facet_search.rs        |  2 +-
 crates/meilisearch/src/routes/indexes/mod.rs  |  4 ++--
 .../meilisearch/src/routes/indexes/search.rs  | 14 +++++++------
 .../meilisearch/src/routes/indexes/similar.rs |  2 +-
 crates/meilisearch/src/routes/multi_search.rs |  6 ++++--
 crates/meilisearch/src/search/federated.rs    |  7 ++++---
 12 files changed, 68 insertions(+), 39 deletions(-)

diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs
index f6a4ecc04..82388172e 100644
--- a/crates/index-scheduler/src/error.rs
+++ b/crates/index-scheduler/src/error.rs
@@ -122,8 +122,11 @@ pub enum Error {
     Dump(#[from] dump::Error),
     #[error(transparent)]
     Heed(#[from] heed::Error),
-    #[error(transparent)]
-    Milli(#[from] milli::Error),
+    #[error("{}", match .index_name {
+        Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
+        _ => format!("{error}")
+    })]
+    Milli { error: milli::Error, index_name: Option<String> },
     #[error("An unexpected crash occurred when processing the task.")]
     ProcessBatchPanicked,
     #[error(transparent)]
@@ -190,7 +193,7 @@ impl Error {
             | Error::AbortedTask
             | Error::Dump(_)
             | Error::Heed(_)
-            | Error::Milli(_)
+            | Error::Milli { .. }
             | Error::ProcessBatchPanicked
             | Error::FileStore(_)
             | Error::IoError(_)
@@ -209,6 +212,10 @@ impl Error {
     pub fn with_custom_error_code(self, code: Code) -> Self {
         Self::WithCustomErrorCode(code, Box::new(self))
     }
+
+    pub fn from_milli(error: milli::Error, index_name: Option<String>) -> Self {
+        Self::Milli { error, index_name }
+    }
 }
 
 impl ErrorCode for Error {
@@ -236,7 +243,7 @@ impl ErrorCode for Error {
             // TODO: not sure of the Code to use
             Error::NoSpaceLeftInTaskQueue => Code::NoSpaceLeftOnDevice,
             Error::Dump(e) => e.error_code(),
-            Error::Milli(e) => e.error_code(),
+            Error::Milli { error, .. } => error.error_code(),
             Error::ProcessBatchPanicked => Code::Internal,
             Error::Heed(e) => e.error_code(),
             Error::HeedTransaction(e) => e.error_code(),
diff --git a/crates/index-scheduler/src/index_mapper/index_map.rs b/crates/index-scheduler/src/index_mapper/index_map.rs
index f8080d23b..c20782068 100644
--- a/crates/index-scheduler/src/index_mapper/index_map.rs
+++ b/crates/index-scheduler/src/index_mapper/index_map.rs
@@ -3,13 +3,13 @@ use std::path::Path;
 use std::time::Duration;
 
 use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
-use meilisearch_types::milli::Index;
+use meilisearch_types::milli::{Index, Result};
 use time::OffsetDateTime;
 use uuid::Uuid;
 
 use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
 use crate::lru::{InsertionOutcome, LruMap};
-use crate::{clamp_to_page_size, Result};
+use crate::{clamp_to_page_size};
 
 /// Keep an internally consistent view of the open indexes in memory.
 ///
diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs
index 3cccb5a69..500e4cf83 100644
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 use tracing::error;
 use uuid::Uuid;
-
+use meilisearch_types::milli;
 use self::index_map::IndexMap;
 use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
 use crate::uuid_codec::UuidCodec;
@@ -121,7 +121,7 @@ impl IndexStats {
     /// # Parameters
     ///
     /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
-    pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
+    pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
         Ok(IndexStats {
             number_of_documents: index.number_of_documents(rtxn)?,
             database_size: index.on_disk_size()?,
@@ -189,7 +189,7 @@ impl IndexMapper {
                     date,
                     self.enable_mdb_writemap,
                     self.index_base_map_size,
-                )?;
+                ).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
 
                 wtxn.commit()?;
 
@@ -357,7 +357,8 @@ impl IndexMapper {
                     };
                     let index_path = self.base_path.join(uuid.to_string());
                     // take the lock to reopen the environment.
-                    reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)?;
+                    reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)
+                        .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
                     continue;
                 }
                 BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
@@ -378,7 +379,7 @@ impl IndexMapper {
                                 None,
                                 self.enable_mdb_writemap,
                                 self.index_base_map_size,
-                            )?;
+                            ).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
                         }
                         Available(index) => break index,
                         Closing(_) => {
@@ -459,7 +460,7 @@ impl IndexMapper {
             None => {
                 let index = self.index(rtxn, index_uid)?;
                 let index_rtxn = index.read_txn()?;
-                IndexStats::new(&index, &index_rtxn)
+                IndexStats::new(&index, &index_rtxn).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
             }
         }
     }
diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index 9405ecf24..6147f788f 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -1678,9 +1678,9 @@ impl IndexScheduler {
                 tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
             }
             // If we have an abortion error we must stop the tick here and re-schedule tasks.
-            Err(Error::Milli(milli::Error::InternalError(
-                milli::InternalError::AbortedIndexation,
-            )))
+            Err(Error::Milli{
+               error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), ..
+            })
             | Err(Error::AbortedTask) => {
                 #[cfg(test)]
                 self.breakpoint(Breakpoint::AbortedIndexation);
@@ -1699,9 +1699,9 @@ impl IndexScheduler {
             // 2. close the associated environment
             // 3. resize it
             // 4. re-schedule tasks
-            Err(Error::Milli(milli::Error::UserError(
-                milli::UserError::MaxDatabaseSizeReached,
-            ))) if index_uid.is_some() => {
+            Err(Error::Milli {
+                error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), ..
+            }) if index_uid.is_some() => {
                 // fixme: add index_uid to match to avoid the unwrap
                 let index_uid = index_uid.unwrap();
                 // fixme: handle error more gracefully? not sure when this could happen
@@ -1943,6 +1943,7 @@ impl IndexScheduler {
     // TODO: consider using a type alias or a struct embedder/template
     pub fn embedders(
         &self,
+        index_uid: String,
         embedding_configs: Vec<IndexEmbeddingConfig>,
     ) -> Result<EmbeddingConfigs> {
         let res: Result<_> = embedding_configs
@@ -1954,7 +1955,10 @@ impl IndexScheduler {
                      ..
                  }| {
                     let prompt =
-                        Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
+                        Arc::new(prompt.try_into()
+                            .map_err(meilisearch_types::milli::Error::from)
+                            .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?
+                        );
                     // optimistically return existing embedder
                     {
                         let embedders = self.embedders.read().unwrap();
@@ -1970,7 +1974,8 @@ impl IndexScheduler {
                     let embedder = Arc::new(
                         Embedder::new(embedder_options.clone())
                             .map_err(meilisearch_types::milli::vector::Error::from)
-                            .map_err(meilisearch_types::milli::Error::from)?,
+                            .map_err(meilisearch_types::milli::Error::from)
+                            .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
                     );
                     {
                         let mut embedders = self.embedders.write().unwrap();
diff --git a/crates/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs
index 5c4ce171f..6e7283a18 100644
--- a/crates/meilisearch/src/error.rs
+++ b/crates/meilisearch/src/error.rs
@@ -7,6 +7,7 @@ use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
 use meilisearch_types::milli::OrderBy;
 use serde_json::Value;
 use tokio::task::JoinError;
+use meilisearch_types::milli;
 
 #[derive(Debug, thiserror::Error)]
 pub enum MeilisearchHttpError {
@@ -62,8 +63,11 @@ pub enum MeilisearchHttpError {
     HeedError(#[from] meilisearch_types::heed::Error),
     #[error(transparent)]
     IndexScheduler(#[from] index_scheduler::Error),
-    #[error(transparent)]
-    Milli(#[from] meilisearch_types::milli::Error),
+    #[error("{}", match .index_name {
+        Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
+        _ => format!("{error}")
+    })]
+    Milli { error: meilisearch_types::milli::Error, index_name: Option<String> },
     #[error(transparent)]
     Payload(#[from] PayloadError),
     #[error(transparent)]
@@ -76,6 +80,12 @@ pub enum MeilisearchHttpError {
     MissingSearchHybrid,
 }
 
+impl MeilisearchHttpError {
+    pub(crate) fn from_milli(error: milli::Error, index_name: Option<String>) -> Self {
+        Self::Milli { error, index_name }
+    }
+}
+
 impl ErrorCode for MeilisearchHttpError {
     fn error_code(&self) -> Code {
         match self {
@@ -95,7 +105,7 @@ impl ErrorCode for MeilisearchHttpError {
             MeilisearchHttpError::SerdeJson(_) => Code::Internal,
             MeilisearchHttpError::HeedError(_) => Code::Internal,
             MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
-            MeilisearchHttpError::Milli(e) => e.error_code(),
+            MeilisearchHttpError::Milli{error, ..} => error.error_code(),
             MeilisearchHttpError::Payload(e) => e.error_code(),
             MeilisearchHttpError::FileStore(_) => Code::Internal,
             MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs
index 633ad2776..779af63f2 100644
--- a/crates/meilisearch/src/lib.rs
+++ b/crates/meilisearch/src/lib.rs
@@ -395,6 +395,7 @@ fn import_dump(
     for index_reader in dump_reader.indexes()? {
         let mut index_reader = index_reader?;
         let metadata = index_reader.metadata();
+        let uid = metadata.uid.clone();
         tracing::info!("Importing index `{}`.", metadata.uid);
 
         let date = Some((metadata.created_at, metadata.updated_at));
@@ -432,7 +433,7 @@ fn import_dump(
         let reader = DocumentsBatchReader::from_reader(reader)?;
 
         let embedder_configs = index.embedding_configs(&wtxn)?;
-        let embedders = index_scheduler.embedders(embedder_configs)?;
+        let embedders = index_scheduler.embedders(uid, embedder_configs)?;
 
         let builder = milli::update::IndexDocuments::new(
             &mut wtxn,
diff --git a/crates/meilisearch/src/routes/indexes/facet_search.rs b/crates/meilisearch/src/routes/indexes/facet_search.rs
index 99a4a4f28..fc29d3406 100644
--- a/crates/meilisearch/src/routes/indexes/facet_search.rs
+++ b/crates/meilisearch/src/routes/indexes/facet_search.rs
@@ -185,7 +185,7 @@ pub async fn search(
 
     let index = index_scheduler.index(&index_uid)?;
     let features = index_scheduler.features();
-    let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
+    let search_kind = search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?;
     let permit = search_queue.try_get_search_permit().await?;
     let search_result = tokio::task::spawn_blocking(move || {
         perform_facet_search(
diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs
index 7d073ec5f..1dda27a98 100644
--- a/crates/meilisearch/src/routes/indexes/mod.rs
+++ b/crates/meilisearch/src/routes/indexes/mod.rs
@@ -5,7 +5,7 @@ use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::{DeserializeError, Deserr, ValuePointerRef};
-use index_scheduler::IndexScheduler;
+use index_scheduler::{Error, IndexScheduler};
 use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::*;
@@ -107,7 +107,7 @@ pub async fn list_indexes(
             if !filters.is_index_authorized(uid) {
                 return Ok(None);
             }
-            Ok(Some(IndexView::new(uid.to_string(), index)?))
+            Ok(Some(IndexView::new(uid.to_string(), index).map_err(|e| Error::from_milli(e, Some(uid.to_string())))?))
         })?;
     // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
     let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs
index 2f5cb4a36..609439b4a 100644
--- a/crates/meilisearch/src/routes/indexes/search.rs
+++ b/crates/meilisearch/src/routes/indexes/search.rs
@@ -243,11 +243,11 @@ pub async fn search_with_url_query(
     let index = index_scheduler.index(&index_uid)?;
     let features = index_scheduler.features();
 
-    let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
+    let search_kind = search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
     let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
     let permit = search_queue.try_get_search_permit().await?;
     let search_result = tokio::task::spawn_blocking(move || {
-        perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
+        perform_search(index_uid.to_string(), &index, query, search_kind, retrieve_vector, index_scheduler.features())
     })
     .await;
     permit.drop().await;
@@ -287,12 +287,12 @@ pub async fn search_with_post(
 
     let features = index_scheduler.features();
 
-    let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features)?;
+    let search_kind = search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
     let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
 
     let permit = search_queue.try_get_search_permit().await?;
     let search_result = tokio::task::spawn_blocking(move || {
-        perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
+        perform_search(index_uid.to_string(), &index, query, search_kind, retrieve_vectors, index_scheduler.features())
     })
     .await;
     permit.drop().await;
@@ -314,6 +314,7 @@ pub async fn search_with_post(
 pub fn search_kind(
     query: &SearchQuery,
     index_scheduler: &IndexScheduler,
+    index_uid: String,
     index: &milli::Index,
     features: RoFeatures,
 ) -> Result<SearchKind, ResponseError> {
@@ -332,7 +333,7 @@ pub fn search_kind(
         (None, _, None) => Ok(SearchKind::KeywordOnly),
         // hybrid.semantic_ratio == 1.0 => vector
         (_, Some(HybridQuery { semantic_ratio, embedder }), v) if **semantic_ratio == 1.0 => {
-            SearchKind::semantic(index_scheduler, index, embedder, v.map(|v| v.len()))
+            SearchKind::semantic(index_scheduler, index_uid, index, embedder, v.map(|v| v.len()))
         }
         // hybrid.semantic_ratio == 0.0 => keyword
         (_, Some(HybridQuery { semantic_ratio, embedder: _ }), _) if **semantic_ratio == 0.0 => {
@@ -340,13 +341,14 @@ pub fn search_kind(
         }
         // no query, hybrid, vector => semantic
         (None, Some(HybridQuery { semantic_ratio: _, embedder }), Some(v)) => {
-            SearchKind::semantic(index_scheduler, index, embedder, Some(v.len()))
+            SearchKind::semantic(index_scheduler, index_uid, index, embedder, Some(v.len()))
         }
         // query, no hybrid, no vector => keyword
         (Some(_), None, None) => Ok(SearchKind::KeywordOnly),
         // query, hybrid, maybe vector => hybrid
         (Some(_), Some(HybridQuery { semantic_ratio, embedder }), v) => SearchKind::hybrid(
             index_scheduler,
+            index_uid,
             index,
             embedder,
             **semantic_ratio,
diff --git a/crates/meilisearch/src/routes/indexes/similar.rs b/crates/meilisearch/src/routes/indexes/similar.rs
index 79f42f0aa..a0fccff52 100644
--- a/crates/meilisearch/src/routes/indexes/similar.rs
+++ b/crates/meilisearch/src/routes/indexes/similar.rs
@@ -104,7 +104,7 @@ async fn similar(
     let index = index_scheduler.index(&index_uid)?;
 
     let (embedder_name, embedder, quantized) =
-        SearchKind::embedder(&index_scheduler, &index, &query.embedder, None)?;
+        SearchKind::embedder(&index_scheduler, index_uid.to_string(), &index, &query.embedder, None)?;
 
     tokio::task::spawn_blocking(move || {
         perform_similar(
diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs
index f8b1bc6ee..c4496e41c 100644
--- a/crates/meilisearch/src/routes/multi_search.rs
+++ b/crates/meilisearch/src/routes/multi_search.rs
@@ -125,14 +125,16 @@ pub async fn multi_search_with_post(
                         })
                         .with_index(query_index)?;
 
+                    let index_uid_str = index_uid.to_string();
+
                     let search_kind =
-                        search_kind(&query, index_scheduler.get_ref(), &index, features)
+                        search_kind(&query, index_scheduler.get_ref(), index_uid_str.clone(), &index, features)
                             .with_index(query_index)?;
                     let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
                         .with_index(query_index)?;
 
                     let search_result = tokio::task::spawn_blocking(move || {
-                        perform_search(&index, query, search_kind, retrieve_vector, features)
+                        perform_search(index_uid_str.clone(), &index, query, search_kind, retrieve_vector, features)
                     })
                     .await
                     .with_index(query_index)?;
diff --git a/crates/meilisearch/src/search/federated.rs b/crates/meilisearch/src/search/federated.rs
index 5279c26bb..5aae82c66 100644
--- a/crates/meilisearch/src/search/federated.rs
+++ b/crates/meilisearch/src/search/federated.rs
@@ -560,7 +560,7 @@ pub fn perform_federated_search(
             // use an immediately invoked lambda to capture the result without returning from the function
 
             let res: Result<(), ResponseError> = (|| {
-                let search_kind = search_kind(&query, index_scheduler, &index, features)?;
+                let search_kind = search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?;
 
                 let canonicalization_kind = match (&search_kind, &query.q) {
                     (SearchKind::SemanticOnly { .. }, _) => {
@@ -636,7 +636,7 @@ pub fn perform_federated_search(
                 search.offset(0);
                 search.limit(required_hit_count);
 
-                let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
+                let (result, _semantic_hit_count) = super::search_from_kind(index_uid.to_string(), search_kind, search)?;
                 let format = AttributesFormat {
                     attributes_to_retrieve: query.attributes_to_retrieve,
                     retrieve_vectors,
@@ -670,7 +670,8 @@ pub fn perform_federated_search(
 
                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
 
-                let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
+                let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)
+                    .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
 
                 results_by_query.push(SearchResultByQuery {
                     federation_options,

From b75f1f4c17c3cd8608227ea11219018feca69960 Mon Sep 17 00:00:00 2001
From: airycanon <airycanon@airycanon.me>
Date: Fri, 22 Nov 2024 14:19:20 +0800
Subject: [PATCH 104/158] fix tests

# Conflicts:
#	crates/index-scheduler/src/batch.rs
#	crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fifth_task_succeeds.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_bad_primary_key/fourth_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/second_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key/third_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_multiple_primary_key_batch_wrong_key/second_and_third_tasks_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/all_other_tasks_succeeds.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/second_task_fails.snap
#	crates/index-scheduler/src/snapshots/lib.rs/test_document_addition_with_set_and_null_primary_key_inference_works/third_task_succeeds.snap

# Conflicts:
#	crates/index-scheduler/src/batch.rs
#	crates/meilisearch/src/search/mod.rs
#	crates/meilisearch/tests/vector/mod.rs

# Conflicts:
#	crates/index-scheduler/src/batch.rs
---
 crates/index-scheduler/src/batch.rs           | 210 ++++++++++--------
 crates/index-scheduler/src/error.rs           |  23 +-
 .../src/index_mapper/index_map.rs             |   3 +-
 .../index-scheduler/src/index_mapper/mod.rs   |  51 +++--
 crates/index-scheduler/src/lib.rs             |  24 +-
 .../after_removing_the_documents.snap         |   4 +-
 crates/meilisearch/src/error.rs               |   6 +-
 .../src/routes/indexes/facet_search.rs        |   3 +-
 crates/meilisearch/src/routes/indexes/mod.rs  |   5 +-
 .../meilisearch/src/routes/indexes/search.rs  |  24 +-
 .../meilisearch/src/routes/indexes/similar.rs |   9 +-
 crates/meilisearch/src/routes/multi_search.rs |  20 +-
 crates/meilisearch/src/search/federated.rs    |  12 +-
 crates/meilisearch/src/search/mod.rs          |  55 +++--
 .../tests/documents/add_documents.rs          |  32 +--
 crates/meilisearch/tests/documents/errors.rs  |   4 +-
 .../meilisearch/tests/index/update_index.rs   |   2 +-
 crates/meilisearch/tests/search/errors.rs     | 104 ++++-----
 crates/meilisearch/tests/search/multi.rs      |  12 +-
 crates/meilisearch/tests/tasks/mod.rs         |   2 +-
 .../tests/vector/binary_quantized.rs          |   2 +-
 crates/meilisearch/tests/vector/mod.rs        |  24 +-
 crates/meilisearch/tests/vector/openai.rs     |   4 +-
 crates/meilisearch/tests/vector/rest.rs       |  24 +-
 24 files changed, 378 insertions(+), 281 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index cc730e286..9a3ba4929 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -29,7 +29,6 @@ use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
 use dump::IndexMetadata;
 use meilisearch_types::batches::BatchId;
-use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
 use meilisearch_types::milli::heed::CompactionOption;
@@ -689,7 +688,9 @@ impl IndexScheduler {
                     let index = self.index_mapper.index(&rtxn, name)?;
                     let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
                     fs::create_dir_all(&dst)?;
-                    index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
+                    index
+                        .copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)
+                        .map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
                 }
 
                 drop(rtxn);
@@ -791,16 +792,19 @@ impl IndexScheduler {
                             let content_file = self.file_store.get_update(content_file)?;
 
                             let reader = DocumentsBatchReader::from_reader(content_file)
-                                .map_err(milli::Error::from)?;
+                                .map_err(|e| Error::from_milli(e.into(), None))?;
 
                             let (mut cursor, documents_batch_index) =
                                 reader.into_cursor_and_fields_index();
 
-                            while let Some(doc) =
-                                cursor.next_document().map_err(milli::Error::from)?
+                            while let Some(doc) = cursor
+                                .next_document()
+                                .map_err(|e| Error::from_milli(e.into(), None))?
                             {
-                                dump_content_file
-                                    .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
+                                dump_content_file.push_document(
+                                    &obkv_to_object(doc, &documents_batch_index)
+                                        .map_err(|e| Error::from_milli(e, None))?,
+                                )?;
                             }
                             dump_content_file.flush()?;
                         }
@@ -814,27 +818,41 @@ impl IndexScheduler {
                     let metadata = IndexMetadata {
                         uid: uid.to_owned(),
                         primary_key: index.primary_key(&rtxn)?.map(String::from),
-                        created_at: index.created_at(&rtxn)?,
-                        updated_at: index.updated_at(&rtxn)?,
+                        created_at: index
+                            .created_at(&rtxn)
+                            .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
+                        updated_at: index
+                            .updated_at(&rtxn)
+                            .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
                     };
                     let mut index_dumper = dump.create_index(uid, &metadata)?;
 
                     let fields_ids_map = index.fields_ids_map(&rtxn)?;
                     let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
-                    let embedding_configs = index.embedding_configs(&rtxn)?;
+                    let embedding_configs = index
+                        .embedding_configs(&rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
 
+                    let documents = index
+                        .all_documents(&rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
                     // 3.1. Dump the documents
-                    for ret in index.all_documents(&rtxn)? {
+                    for ret in documents {
                         if self.must_stop_processing.get() {
                             return Err(Error::AbortedTask);
                         }
 
-                        let (id, doc) = ret?;
+                        let (id, doc) =
+                            ret.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
 
-                        let mut document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+                        let mut document =
+                            milli::obkv_to_json(&all_fields, &fields_ids_map, doc)
+                                .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
 
                         'inject_vectors: {
-                            let embeddings = index.embeddings(&rtxn, id)?;
+                            let embeddings = index
+                                .embeddings(&rtxn, id)
+                                .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
 
                             if embeddings.is_empty() {
                                 break 'inject_vectors;
@@ -845,7 +863,7 @@ impl IndexScheduler {
                                 .or_insert(serde_json::Value::Object(Default::default()));
 
                             let serde_json::Value::Object(vectors) = vectors else {
-                                return Err(milli::Error::UserError(
+                                let user_err = milli::Error::UserError(
                                     milli::UserError::InvalidVectorsMapType {
                                         document_id: {
                                             if let Ok(Some(Ok(index))) = index
@@ -859,8 +877,9 @@ impl IndexScheduler {
                                         },
                                         value: vectors.clone(),
                                     },
-                                )
-                                .into());
+                                );
+
+                                return Err(Error::from_milli(user_err, Some(uid.to_string())));
                             };
 
                             for (embedder_name, embeddings) in embeddings {
@@ -890,7 +909,8 @@ impl IndexScheduler {
                         index,
                         &rtxn,
                         meilisearch_types::settings::SecretPolicy::RevealSecrets,
-                    )?;
+                    )
+                    .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
                     index_dumper.settings(&settings)?;
                     Ok(())
                 })?;
@@ -946,7 +966,8 @@ impl IndexScheduler {
                 // the entire batch.
                 let res = || -> Result<()> {
                     let index_rtxn = index.read_txn()?;
-                    let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
+                    let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
                     let mut wtxn = self.env.write_txn()?;
                     self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
                     wtxn.commit()?;
@@ -988,10 +1009,12 @@ impl IndexScheduler {
                     );
                     builder.set_primary_key(primary_key);
                     let must_stop_processing = self.must_stop_processing.clone();
-                    builder.execute(
-                        |indexing_step| tracing::debug!(update = ?indexing_step),
-                        || must_stop_processing.get(),
-                    )?;
+                    builder
+                        .execute(
+                            |indexing_step| tracing::debug!(update = ?indexing_step),
+                            || must_stop_processing.get(),
+                        )
+                        .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))?;
                     index_wtxn.commit()?;
                 }
 
@@ -1008,7 +1031,8 @@ impl IndexScheduler {
                 let res = || -> Result<()> {
                     let mut wtxn = self.env.write_txn()?;
                     let index_rtxn = index.read_txn()?;
-                    let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
+                    let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
                     self.index_mapper.store_stats_of(&mut wtxn, &index_uid, &stats)?;
                     wtxn.commit()?;
                     Ok(())
@@ -1031,7 +1055,9 @@ impl IndexScheduler {
                 let number_of_documents = || -> Result<u64> {
                     let index = self.index_mapper.index(&wtxn, &index_uid)?;
                     let index_rtxn = index.read_txn()?;
-                    Ok(index.number_of_documents(&index_rtxn)?)
+                    index
+                        .number_of_documents(&index_rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(index_uid.to_string())))
                 }()
                 .unwrap_or_default();
 
@@ -1188,8 +1214,10 @@ impl IndexScheduler {
         };
 
         match operation {
-            IndexOperation::DocumentClear { mut tasks, .. } => {
-                let count = milli::update::ClearDocuments::new(index_wtxn, index).execute()?;
+            IndexOperation::DocumentClear { index_uid, mut tasks } => {
+                let count = milli::update::ClearDocuments::new(index_wtxn, index)
+                    .execute()
+                    .map_err(|e| Error::from_milli(e, Some(index_uid)))?;
 
                 let mut first_clear_found = false;
                 for task in &mut tasks {
@@ -1209,7 +1237,7 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             IndexOperation::DocumentOperation {
-                index_uid: _,
+                index_uid,
                 primary_key,
                 method,
                 operations,
@@ -1235,13 +1263,17 @@ impl IndexScheduler {
 
                 let mut content_files_iter = content_files.iter();
                 let mut indexer = indexer::DocumentOperation::new(method);
-                let embedders = index.embedding_configs(index_wtxn)?;
-                let embedders = self.embedders(embedders)?;
+                let embedders = index
+                    .embedding_configs(index_wtxn)
+                    .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
+                let embedders = self.embedders(index_uid.clone(), embedders)?;
                 for operation in operations {
                     match operation {
                         DocumentOperation::Add(_content_uuid) => {
                             let mmap = content_files_iter.next().unwrap();
-                            indexer.add_documents(mmap)?;
+                            indexer
+                                .add_documents(mmap)
+                                .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
                         }
                         DocumentOperation::Delete(document_ids) => {
                             let document_ids: bumpalo::collections::vec::Vec<_> = document_ids
@@ -1266,15 +1298,17 @@ impl IndexScheduler {
                     }
                 };
 
-                let (document_changes, operation_stats, primary_key) = indexer.into_changes(
-                    &indexer_alloc,
-                    index,
-                    &rtxn,
-                    primary_key.as_deref(),
-                    &mut new_fields_ids_map,
-                    &|| must_stop_processing.get(),
-                    &send_progress,
-                )?;
+                let (document_changes, operation_stats, primary_key) = indexer
+                    .into_changes(
+                        &indexer_alloc,
+                        index,
+                        &rtxn,
+                        primary_key.as_deref(),
+                        &mut new_fields_ids_map,
+                        &|| must_stop_processing.get(),
+                        &send_progress,
+                    )
+                    .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
 
                 let mut addition = 0;
                 for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
@@ -1321,14 +1355,15 @@ impl IndexScheduler {
                         embedders,
                         &|| must_stop_processing.get(),
                         &send_progress,
-                    )?;
+                    )
+                    .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
 
                     tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
 
                 Ok(tasks)
             }
-            IndexOperation::DocumentEdition { mut task, .. } => {
+            IndexOperation::DocumentEdition { index_uid, mut task } => {
                 let (filter, code) = if let KindWithContent::DocumentEdition {
                     filter_expr,
                     context: _,
@@ -1342,16 +1377,11 @@ impl IndexScheduler {
                 };
 
                 let candidates = match filter.as_ref().map(Filter::from_json) {
-                    Some(Ok(Some(filter))) => {
-                        filter.evaluate(index_wtxn, index).map_err(|err| match err {
-                            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
-                                Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
-                            }
-                            e => e.into(),
-                        })?
-                    }
+                    Some(Ok(Some(filter))) => filter
+                        .evaluate(index_wtxn, index)
+                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
                     None | Some(Ok(None)) => index.documents_ids(index_wtxn)?,
-                    Some(Err(e)) => return Err(e.into()),
+                    Some(Err(e)) => return Err(Error::from_milli(e, Some(index_uid.clone()))),
                 };
 
                 let (original_filter, context, function) = if let Some(Details::DocumentEdition {
@@ -1386,8 +1416,9 @@ impl IndexScheduler {
                 // candidates not empty => index not empty => a primary key is set
                 let primary_key = index.primary_key(&rtxn)?.unwrap();
 
-                let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
-                    .map_err(milli::Error::from)?;
+                let primary_key =
+                    PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
+                        .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
 
                 let result_count = Ok((candidates.len(), candidates.len())) as Result<_>;
 
@@ -1406,11 +1437,17 @@ impl IndexScheduler {
                     };
 
                     let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
-                    let document_changes =
-                        pool.install(|| indexer.into_changes(&primary_key)).unwrap()?;
-
-                    let embedders = index.embedding_configs(index_wtxn)?;
-                    let embedders = self.embedders(embedders)?;
+                    let document_changes = pool
+                        .install(|| {
+                            indexer
+                                .into_changes(&primary_key)
+                                .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))
+                        })
+                        .unwrap()?;
+                    let embedders = index
+                        .embedding_configs(index_wtxn)
+                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
+                    let embedders = self.embedders(index_uid.clone(), embedders)?;
 
                     indexer::index(
                         index_wtxn,
@@ -1424,7 +1461,8 @@ impl IndexScheduler {
                         embedders,
                         &|| must_stop_processing.get(),
                         &send_progress,
-                    )?;
+                    )
+                    .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
                     // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
@@ -1455,7 +1493,7 @@ impl IndexScheduler {
 
                 Ok(vec![task])
             }
-            IndexOperation::DocumentDeletion { mut tasks, index_uid: _ } => {
+            IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
                 let mut to_delete = RoaringBitmap::new();
                 let external_documents_ids = index.external_documents_ids();
 
@@ -1476,35 +1514,23 @@ impl IndexScheduler {
                                 deleted_documents: Some(will_be_removed),
                             });
                         }
-                        KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
+                        KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } => {
                             let before = to_delete.len();
                             let filter = match Filter::from_json(filter_expr) {
                                 Ok(filter) => filter,
                                 Err(err) => {
                                     // theorically, this should be catched by deserr before reaching the index-scheduler and cannot happens
                                     task.status = Status::Failed;
-                                    task.error = match err {
-                                        milli::Error::UserError(
-                                            milli::UserError::InvalidFilterExpression { .. },
-                                        ) => Some(
-                                            Error::from(err)
-                                                .with_custom_error_code(Code::InvalidDocumentFilter)
-                                                .into(),
-                                        ),
-                                        e => Some(e.into()),
-                                    };
+                                    task.error = Some(
+                                        Error::from_milli(err, Some(index_uid.clone())).into(),
+                                    );
                                     None
                                 }
                             };
                             if let Some(filter) = filter {
-                                let candidates =
-                                    filter.evaluate(index_wtxn, index).map_err(|err| match err {
-                                        milli::Error::UserError(
-                                            milli::UserError::InvalidFilter(_),
-                                        ) => Error::from(err)
-                                            .with_custom_error_code(Code::InvalidDocumentFilter),
-                                        e => e.into(),
-                                    });
+                                let candidates = filter
+                                    .evaluate(index_wtxn, index)
+                                    .map_err(|err| Error::from_milli(err, Some(index_uid.clone())));
                                 match candidates {
                                     Ok(candidates) => to_delete |= candidates,
                                     Err(err) => {
@@ -1540,8 +1566,9 @@ impl IndexScheduler {
                 // to_delete not empty => index not empty => primary key set
                 let primary_key = index.primary_key(&rtxn)?.unwrap();
 
-                let primary_key = PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
-                    .map_err(milli::Error::from)?;
+                let primary_key =
+                    PrimaryKey::new_or_insert(primary_key, &mut new_fields_ids_map)
+                        .map_err(|err| Error::from_milli(err.into(), Some(index_uid.clone())))?;
 
                 if !tasks.iter().all(|res| res.error.is_some()) {
                     let local_pool;
@@ -1560,8 +1587,10 @@ impl IndexScheduler {
                     let mut indexer = indexer::DocumentDeletion::new();
                     indexer.delete_documents_by_docids(to_delete);
                     let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
-                    let embedders = index.embedding_configs(index_wtxn)?;
-                    let embedders = self.embedders(embedders)?;
+                    let embedders = index
+                        .embedding_configs(index_wtxn)
+                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
+                    let embedders = self.embedders(index_uid.clone(), embedders)?;
 
                     indexer::index(
                         index_wtxn,
@@ -1575,14 +1604,15 @@ impl IndexScheduler {
                         embedders,
                         &|| must_stop_processing.get(),
                         &send_progress,
-                    )?;
+                    )
+                    .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
                     // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
 
                 Ok(tasks)
             }
-            IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
+            IndexOperation::Settings { index_uid, settings, mut tasks } => {
                 let indexer_config = self.index_mapper.indexer_config();
                 let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
 
@@ -1596,10 +1626,12 @@ impl IndexScheduler {
                     task.status = Status::Succeeded;
                 }
 
-                builder.execute(
-                    |indexing_step| tracing::debug!(update = ?indexing_step),
-                    || must_stop_processing.get(),
-                )?;
+                builder
+                    .execute(
+                        |indexing_step| tracing::debug!(update = ?indexing_step),
+                        || must_stop_processing.get(),
+                    )
+                    .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
                 Ok(tasks)
             }
diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs
index 82388172e..5fb04828c 100644
--- a/crates/index-scheduler/src/error.rs
+++ b/crates/index-scheduler/src/error.rs
@@ -1,13 +1,12 @@
 use std::fmt::Display;
 
+use crate::TaskId;
 use meilisearch_types::batches::BatchId;
 use meilisearch_types::error::{Code, ErrorCode};
 use meilisearch_types::tasks::{Kind, Status};
 use meilisearch_types::{heed, milli};
 use thiserror::Error;
 
-use crate::TaskId;
-
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum DateField {
     BeforeEnqueuedAt,
@@ -122,11 +121,11 @@ pub enum Error {
     Dump(#[from] dump::Error),
     #[error(transparent)]
     Heed(#[from] heed::Error),
-    #[error("{}", match .index_name {
-        Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
+    #[error("{}", match .index_uid {
+        Some(uid) if !uid.is_empty() => format!("Index `{}`: {error}", uid),
         _ => format!("{error}")
     })]
-    Milli { error: milli::Error, index_name: Option<String> },
+    Milli { error: milli::Error, index_uid: Option<String> },
     #[error("An unexpected crash occurred when processing the task.")]
     ProcessBatchPanicked,
     #[error(transparent)]
@@ -213,8 +212,18 @@ impl Error {
         Self::WithCustomErrorCode(code, Box::new(self))
     }
 
-    pub fn from_milli(error: milli::Error, index_name: Option<String>) -> Self {
-        Self::Milli { error, index_name }
+    pub fn from_milli(err: milli::Error, index_uid: Option<String>) -> Self {
+        match err {
+            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
+                Self::Milli { error: err, index_uid }
+                    .with_custom_error_code(Code::InvalidDocumentFilter)
+            }
+            milli::Error::UserError(milli::UserError::InvalidFilterExpression { .. }) => {
+                Self::Milli { error: err, index_uid }
+                    .with_custom_error_code(Code::InvalidDocumentFilter)
+            }
+            _ => Self::Milli { error: err, index_uid },
+        }
     }
 }
 
diff --git a/crates/index-scheduler/src/index_mapper/index_map.rs b/crates/index-scheduler/src/index_mapper/index_map.rs
index c20782068..480dafa7c 100644
--- a/crates/index-scheduler/src/index_mapper/index_map.rs
+++ b/crates/index-scheduler/src/index_mapper/index_map.rs
@@ -8,9 +8,8 @@ use time::OffsetDateTime;
 use uuid::Uuid;
 
 use super::IndexStatus::{self, Available, BeingDeleted, Closing, Missing};
+use crate::clamp_to_page_size;
 use crate::lru::{InsertionOutcome, LruMap};
-use crate::{clamp_to_page_size};
-
 /// Keep an internally consistent view of the open indexes in memory.
 ///
 /// This view is made of an LRU cache that will evict the least frequently used indexes when new indexes are opened.
diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs
index 500e4cf83..8b9ef3597 100644
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@@ -3,19 +3,19 @@ use std::sync::{Arc, RwLock};
 use std::time::Duration;
 use std::{fs, thread};
 
+use self::index_map::IndexMap;
+use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
+use crate::uuid_codec::UuidCodec;
+use crate::{Error, Result};
 use meilisearch_types::heed::types::{SerdeJson, Str};
 use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
+use meilisearch_types::milli;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{FieldDistribution, Index};
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 use tracing::error;
 use uuid::Uuid;
-use meilisearch_types::milli;
-use self::index_map::IndexMap;
-use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
-use crate::uuid_codec::UuidCodec;
-use crate::{Error, Result};
 
 mod index_map;
 
@@ -183,13 +183,18 @@ impl IndexMapper {
                 // Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
                 // This is very unlikely to happen in practice.
                 // TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
-                let index = self.index_map.write().unwrap().create(
-                    &uuid,
-                    &index_path,
-                    date,
-                    self.enable_mdb_writemap,
-                    self.index_base_map_size,
-                ).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
+                let index = self
+                    .index_map
+                    .write()
+                    .unwrap()
+                    .create(
+                        &uuid,
+                        &index_path,
+                        date,
+                        self.enable_mdb_writemap,
+                        self.index_base_map_size,
+                    )
+                    .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
 
                 wtxn.commit()?;
 
@@ -357,7 +362,8 @@ impl IndexMapper {
                     };
                     let index_path = self.base_path.join(uuid.to_string());
                     // take the lock to reopen the environment.
-                    reopen.reopen(&mut self.index_map.write().unwrap(), &index_path)
+                    reopen
+                        .reopen(&mut self.index_map.write().unwrap(), &index_path)
                         .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
                     continue;
                 }
@@ -373,13 +379,15 @@ impl IndexMapper {
                         Missing => {
                             let index_path = self.base_path.join(uuid.to_string());
 
-                            break index_map.create(
-                                &uuid,
-                                &index_path,
-                                None,
-                                self.enable_mdb_writemap,
-                                self.index_base_map_size,
-                            ).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
+                            break index_map
+                                .create(
+                                    &uuid,
+                                    &index_path,
+                                    None,
+                                    self.enable_mdb_writemap,
+                                    self.index_base_map_size,
+                                )
+                                .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))?;
                         }
                         Available(index) => break index,
                         Closing(_) => {
@@ -460,7 +468,8 @@ impl IndexMapper {
             None => {
                 let index = self.index(rtxn, index_uid)?;
                 let index_rtxn = index.read_txn()?;
-                IndexStats::new(&index, &index_rtxn).map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
+                IndexStats::new(&index, &index_rtxn)
+                    .map_err(|e| Error::from_milli(e, Some(uuid.to_string())))
             }
         }
     }
diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index 6147f788f..e780b21a1 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -1678,8 +1678,9 @@ impl IndexScheduler {
                 tracing::info!("A batch of tasks was successfully completed with {success} successful tasks and {failure} failed tasks.");
             }
             // If we have an abortion error we must stop the tick here and re-schedule tasks.
-            Err(Error::Milli{
-               error: milli::Error::InternalError(milli::InternalError::AbortedIndexation), ..
+            Err(Error::Milli {
+                error: milli::Error::InternalError(milli::InternalError::AbortedIndexation),
+                ..
             })
             | Err(Error::AbortedTask) => {
                 #[cfg(test)]
@@ -1700,7 +1701,8 @@ impl IndexScheduler {
             // 3. resize it
             // 4. re-schedule tasks
             Err(Error::Milli {
-                error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached), ..
+                error: milli::Error::UserError(milli::UserError::MaxDatabaseSizeReached),
+                ..
             }) if index_uid.is_some() => {
                 // fixme: add index_uid to match to avoid the unwrap
                 let index_uid = index_uid.unwrap();
@@ -1954,11 +1956,12 @@ impl IndexScheduler {
                      config: milli::vector::EmbeddingConfig { embedder_options, prompt, quantized },
                      ..
                  }| {
-                    let prompt =
-                        Arc::new(prompt.try_into()
+                    let prompt = Arc::new(
+                        prompt
+                            .try_into()
                             .map_err(meilisearch_types::milli::Error::from)
-                            .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?
-                        );
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    );
                     // optimistically return existing embedder
                     {
                         let embedders = self.embedders.read().unwrap();
@@ -1974,8 +1977,9 @@ impl IndexScheduler {
                     let embedder = Arc::new(
                         Embedder::new(embedder_options.clone())
                             .map_err(meilisearch_types::milli::vector::Error::from)
-                            .map_err(meilisearch_types::milli::Error::from)
-                            .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?,
+                            .map_err(|err| {
+                                Error::from_milli(err.into(), Some(index_uid.clone()))
+                            })?,
                     );
                     {
                         let mut embedders = self.embedders.write().unwrap();
@@ -6176,7 +6180,7 @@ mod tests {
             insta::assert_json_snapshot!(simple_hf_config.embedder_options);
             let simple_hf_name = name.clone();
 
-            let configs = index_scheduler.embedders(configs).unwrap();
+            let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
             let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
             let beagle_embed =
                 hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap();
diff --git a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
index 492eae3dd..0ee4d91e5 100644
--- a/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
+++ b/crates/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap
@@ -9,8 +9,8 @@ source: crates/index-scheduler/src/lib.rs
 0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
-3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
-4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
+3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
+4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
 5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/crates/meilisearch/src/error.rs b/crates/meilisearch/src/error.rs
index 6e7283a18..41d62507a 100644
--- a/crates/meilisearch/src/error.rs
+++ b/crates/meilisearch/src/error.rs
@@ -4,10 +4,10 @@ use byte_unit::{Byte, UnitType};
 use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
 use meilisearch_types::error::{Code, ErrorCode, ResponseError};
 use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
+use meilisearch_types::milli;
 use meilisearch_types::milli::OrderBy;
 use serde_json::Value;
 use tokio::task::JoinError;
-use meilisearch_types::milli;
 
 #[derive(Debug, thiserror::Error)]
 pub enum MeilisearchHttpError {
@@ -67,7 +67,7 @@ pub enum MeilisearchHttpError {
         Some(name) if !name.is_empty() => format!("Index `{}`: {error}", name),
         _ => format!("{error}")
     })]
-    Milli { error: meilisearch_types::milli::Error, index_name: Option<String> },
+    Milli { error: milli::Error, index_name: Option<String> },
     #[error(transparent)]
     Payload(#[from] PayloadError),
     #[error(transparent)]
@@ -105,7 +105,7 @@ impl ErrorCode for MeilisearchHttpError {
             MeilisearchHttpError::SerdeJson(_) => Code::Internal,
             MeilisearchHttpError::HeedError(_) => Code::Internal,
             MeilisearchHttpError::IndexScheduler(e) => e.error_code(),
-            MeilisearchHttpError::Milli{error, ..} => error.error_code(),
+            MeilisearchHttpError::Milli { error, .. } => error.error_code(),
             MeilisearchHttpError::Payload(e) => e.error_code(),
             MeilisearchHttpError::FileStore(_) => Code::Internal,
             MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
diff --git a/crates/meilisearch/src/routes/indexes/facet_search.rs b/crates/meilisearch/src/routes/indexes/facet_search.rs
index fc29d3406..ff11f1305 100644
--- a/crates/meilisearch/src/routes/indexes/facet_search.rs
+++ b/crates/meilisearch/src/routes/indexes/facet_search.rs
@@ -185,7 +185,8 @@ pub async fn search(
 
     let index = index_scheduler.index(&index_uid)?;
     let features = index_scheduler.features();
-    let search_kind = search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?;
+    let search_kind =
+        search_kind(&search_query, &index_scheduler, index_uid.to_string(), &index, features)?;
     let permit = search_queue.try_get_search_permit().await?;
     let search_result = tokio::task::spawn_blocking(move || {
         perform_facet_search(
diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs
index 1dda27a98..b2a85335b 100644
--- a/crates/meilisearch/src/routes/indexes/mod.rs
+++ b/crates/meilisearch/src/routes/indexes/mod.rs
@@ -107,7 +107,10 @@ pub async fn list_indexes(
             if !filters.is_index_authorized(uid) {
                 return Ok(None);
             }
-            Ok(Some(IndexView::new(uid.to_string(), index).map_err(|e| Error::from_milli(e, Some(uid.to_string())))?))
+            Ok(Some(
+                IndexView::new(uid.to_string(), index)
+                    .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?,
+            ))
         })?;
     // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
     let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
diff --git a/crates/meilisearch/src/routes/indexes/search.rs b/crates/meilisearch/src/routes/indexes/search.rs
index 609439b4a..fbaac67da 100644
--- a/crates/meilisearch/src/routes/indexes/search.rs
+++ b/crates/meilisearch/src/routes/indexes/search.rs
@@ -243,11 +243,19 @@ pub async fn search_with_url_query(
     let index = index_scheduler.index(&index_uid)?;
     let features = index_scheduler.features();
 
-    let search_kind = search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
+    let search_kind =
+        search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
     let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
     let permit = search_queue.try_get_search_permit().await?;
     let search_result = tokio::task::spawn_blocking(move || {
-        perform_search(index_uid.to_string(), &index, query, search_kind, retrieve_vector, index_scheduler.features())
+        perform_search(
+            index_uid.to_string(),
+            &index,
+            query,
+            search_kind,
+            retrieve_vector,
+            index_scheduler.features(),
+        )
     })
     .await;
     permit.drop().await;
@@ -287,12 +295,20 @@ pub async fn search_with_post(
 
     let features = index_scheduler.features();
 
-    let search_kind = search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
+    let search_kind =
+        search_kind(&query, index_scheduler.get_ref(), index_uid.to_string(), &index, features)?;
     let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
 
     let permit = search_queue.try_get_search_permit().await?;
     let search_result = tokio::task::spawn_blocking(move || {
-        perform_search(index_uid.to_string(), &index, query, search_kind, retrieve_vectors, index_scheduler.features())
+        perform_search(
+            index_uid.to_string(),
+            &index,
+            query,
+            search_kind,
+            retrieve_vectors,
+            index_scheduler.features(),
+        )
     })
     .await;
     permit.drop().await;
diff --git a/crates/meilisearch/src/routes/indexes/similar.rs b/crates/meilisearch/src/routes/indexes/similar.rs
index a0fccff52..f47771061 100644
--- a/crates/meilisearch/src/routes/indexes/similar.rs
+++ b/crates/meilisearch/src/routes/indexes/similar.rs
@@ -103,8 +103,13 @@ async fn similar(
 
     let index = index_scheduler.index(&index_uid)?;
 
-    let (embedder_name, embedder, quantized) =
-        SearchKind::embedder(&index_scheduler, index_uid.to_string(), &index, &query.embedder, None)?;
+    let (embedder_name, embedder, quantized) = SearchKind::embedder(
+        &index_scheduler,
+        index_uid.to_string(),
+        &index,
+        &query.embedder,
+        None,
+    )?;
 
     tokio::task::spawn_blocking(move || {
         perform_similar(
diff --git a/crates/meilisearch/src/routes/multi_search.rs b/crates/meilisearch/src/routes/multi_search.rs
index c4496e41c..a2db0b22b 100644
--- a/crates/meilisearch/src/routes/multi_search.rs
+++ b/crates/meilisearch/src/routes/multi_search.rs
@@ -127,14 +127,26 @@ pub async fn multi_search_with_post(
 
                     let index_uid_str = index_uid.to_string();
 
-                    let search_kind =
-                        search_kind(&query, index_scheduler.get_ref(), index_uid_str.clone(), &index, features)
-                            .with_index(query_index)?;
+                    let search_kind = search_kind(
+                        &query,
+                        index_scheduler.get_ref(),
+                        index_uid_str.clone(),
+                        &index,
+                        features,
+                    )
+                    .with_index(query_index)?;
                     let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)
                         .with_index(query_index)?;
 
                     let search_result = tokio::task::spawn_blocking(move || {
-                        perform_search(index_uid_str.clone(), &index, query, search_kind, retrieve_vector, features)
+                        perform_search(
+                            index_uid_str.clone(),
+                            &index,
+                            query,
+                            search_kind,
+                            retrieve_vector,
+                            features,
+                        )
                     })
                     .await
                     .with_index(query_index)?;
diff --git a/crates/meilisearch/src/search/federated.rs b/crates/meilisearch/src/search/federated.rs
index 5aae82c66..c1c6bb7d7 100644
--- a/crates/meilisearch/src/search/federated.rs
+++ b/crates/meilisearch/src/search/federated.rs
@@ -560,7 +560,8 @@ pub fn perform_federated_search(
             // use an immediately invoked lambda to capture the result without returning from the function
 
             let res: Result<(), ResponseError> = (|| {
-                let search_kind = search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?;
+                let search_kind =
+                    search_kind(&query, index_scheduler, index_uid.to_string(), &index, features)?;
 
                 let canonicalization_kind = match (&search_kind, &query.q) {
                     (SearchKind::SemanticOnly { .. }, _) => {
@@ -636,7 +637,8 @@ pub fn perform_federated_search(
                 search.offset(0);
                 search.limit(required_hit_count);
 
-                let (result, _semantic_hit_count) = super::search_from_kind(index_uid.to_string(), search_kind, search)?;
+                let (result, _semantic_hit_count) =
+                    super::search_from_kind(index_uid.to_string(), search_kind, search)?;
                 let format = AttributesFormat {
                     attributes_to_retrieve: query.attributes_to_retrieve,
                     retrieve_vectors,
@@ -670,8 +672,10 @@ pub fn perform_federated_search(
 
                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
 
-                let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)
-                    .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
+                let hit_maker =
+                    HitMaker::new(&index, &rtxn, format, formatter_builder).map_err(|e| {
+                        MeilisearchHttpError::from_milli(e, Some(index_uid.to_string()))
+                    })?;
 
                 results_by_query.push(SearchResultByQuery {
                     federation_options,
diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs
index 7beaad6a5..674ae226b 100644
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -19,7 +19,9 @@ use meilisearch_types::locales::Locale;
 use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
 use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
 use meilisearch_types::milli::vector::Embedder;
-use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
+use meilisearch_types::milli::{
+    FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget,
+};
 use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
 use meilisearch_types::{milli, Document};
 use milli::tokenizer::{Language, TokenizerBuilder};
@@ -281,35 +283,38 @@ pub enum SearchKind {
 impl SearchKind {
     pub(crate) fn semantic(
         index_scheduler: &index_scheduler::IndexScheduler,
+        index_uid: String,
         index: &Index,
         embedder_name: &str,
         vector_len: Option<usize>,
     ) -> Result<Self, ResponseError> {
         let (embedder_name, embedder, quantized) =
-            Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
+            Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
         Ok(Self::SemanticOnly { embedder_name, embedder, quantized })
     }
 
     pub(crate) fn hybrid(
         index_scheduler: &index_scheduler::IndexScheduler,
+        index_uid: String,
         index: &Index,
         embedder_name: &str,
         semantic_ratio: f32,
         vector_len: Option<usize>,
     ) -> Result<Self, ResponseError> {
         let (embedder_name, embedder, quantized) =
-            Self::embedder(index_scheduler, index, embedder_name, vector_len)?;
+            Self::embedder(index_scheduler, index_uid, index, embedder_name, vector_len)?;
         Ok(Self::Hybrid { embedder_name, embedder, quantized, semantic_ratio })
     }
 
     pub(crate) fn embedder(
         index_scheduler: &index_scheduler::IndexScheduler,
+        index_uid: String,
         index: &Index,
         embedder_name: &str,
         vector_len: Option<usize>,
     ) -> Result<(String, Arc<Embedder>, bool), ResponseError> {
         let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
-        let embedders = index_scheduler.embedders(embedder_configs)?;
+        let embedders = index_scheduler.embedders(index_uid, embedder_configs)?;
 
         let (embedder, _, quantized) = embedders
             .get(embedder_name)
@@ -890,6 +895,7 @@ fn prepare_search<'t>(
 }
 
 pub fn perform_search(
+    index_uid: String,
     index: &Index,
     query: SearchQuery,
     search_kind: SearchKind,
@@ -916,7 +922,7 @@ pub fn perform_search(
             used_negative_operator,
         },
         semantic_hit_count,
-    ) = search_from_kind(search_kind, search)?;
+    ) = search_from_kind(index_uid, search_kind, search)?;
 
     let SearchQuery {
         q,
@@ -1069,17 +1075,27 @@ fn compute_facet_distribution_stats<S: AsRef<str>>(
 }
 
 pub fn search_from_kind(
+    index_uid: String,
     search_kind: SearchKind,
     search: milli::Search<'_>,
 ) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> {
     let (milli_result, semantic_hit_count) = match &search_kind {
-        SearchKind::KeywordOnly => (search.execute()?, None),
+        SearchKind::KeywordOnly => {
+            let results = search
+                .execute()
+                .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
+            (results, None)
+        }
         SearchKind::SemanticOnly { .. } => {
-            let results = search.execute()?;
+            let results = search
+                .execute()
+                .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid.to_string())))?;
             let semantic_hit_count = results.document_scores.len() as u32;
             (results, Some(semantic_hit_count))
         }
-        SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?,
+        SearchKind::Hybrid { semantic_ratio, .. } => search
+            .execute_hybrid(*semantic_ratio)
+            .map_err(|e| MeilisearchHttpError::from_milli(e, Some(index_uid)))?,
     };
     Ok((milli_result, semantic_hit_count))
 }
@@ -1181,7 +1197,7 @@ impl<'a> HitMaker<'a> {
         rtxn: &'a RoTxn<'a>,
         format: AttributesFormat,
         mut formatter_builder: MatcherBuilder<'a>,
-    ) -> Result<Self, MeilisearchHttpError> {
+    ) -> milli::Result<Self> {
         formatter_builder.crop_marker(format.crop_marker);
         formatter_builder.highlight_prefix(format.highlight_pre_tag);
         formatter_builder.highlight_suffix(format.highlight_post_tag);
@@ -1276,11 +1292,7 @@ impl<'a> HitMaker<'a> {
         })
     }
 
-    pub fn make_hit(
-        &self,
-        id: u32,
-        score: &[ScoreDetails],
-    ) -> Result<SearchHit, MeilisearchHttpError> {
+    pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
         let (_, obkv) =
             self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
 
@@ -1323,7 +1335,10 @@ impl<'a> HitMaker<'a> {
                     .is_some_and(|conf| conf.user_provided.contains(id));
                 let embeddings =
                     ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
-                vectors.insert(name, serde_json::to_value(embeddings)?);
+                vectors.insert(
+                    name,
+                    serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
+                );
             }
             document.insert("_vectors".into(), vectors.into());
         }
@@ -1369,7 +1384,7 @@ fn make_hits<'a>(
     format: AttributesFormat,
     matching_words: milli::MatchingWords,
     documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a,
-) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
+) -> milli::Result<Vec<SearchHit>> {
     let mut documents = Vec::new();
 
     let dictionary = index.dictionary(rtxn)?;
@@ -1697,12 +1712,12 @@ fn make_document(
     displayed_attributes: &BTreeSet<FieldId>,
     field_ids_map: &FieldsIdsMap,
     obkv: &obkv::KvReaderU16,
-) -> Result<Document, MeilisearchHttpError> {
+) -> milli::Result<Document> {
     let mut document = serde_json::Map::new();
 
     // recreate the original json
     for (key, value) in obkv.iter() {
-        let value = serde_json::from_slice(value)?;
+        let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
         let key = field_ids_map.name(key).expect("Missing field name").to_string();
 
         document.insert(key, value);
@@ -1727,7 +1742,7 @@ fn format_fields(
     displayable_ids: &BTreeSet<FieldId>,
     locales: Option<&[Language]>,
     localized_attributes: &[LocalizedAttributesRule],
-) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
+) -> milli::Result<(Option<MatchesPosition>, Document)> {
     let mut matches_position = compute_matches.then(BTreeMap::new);
     let mut document = document.clone();
 
@@ -1905,7 +1920,7 @@ fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpEr
         }
     }
 
-    Ok(Filter::from_array(ands)?)
+    Filter::from_array(ands).map_err(|e| MeilisearchHttpError::from_milli(e, None))
 }
 
 #[cfg(test)]
diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs
index eebc5dc63..750bf7ae9 100644
--- a/crates/meilisearch/tests/documents/add_documents.rs
+++ b/crates/meilisearch/tests/documents/add_documents.rs
@@ -1681,7 +1681,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
+        "message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1719,7 +1719,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
+        "message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1757,7 +1757,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
+        "message": "Index `test`: Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1795,7 +1795,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
+        "message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1833,7 +1833,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
+        "message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1871,7 +1871,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
+        "message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1909,7 +1909,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
+        "message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1947,7 +1947,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
+        "message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -1985,7 +1985,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
+        "message": "Index `test`: Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2023,7 +2023,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
+        "message": "Index `test`: Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2061,7 +2061,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
+        "message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2099,7 +2099,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
+        "message": "Index `test`: The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2138,7 +2138,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
+        "message": "Index `test`: Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2175,7 +2175,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
+        "message": "Index `test`: Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2212,7 +2212,7 @@ async fn add_documents_invalid_geo_field() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
+        "message": "Index `test`: Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@@ -2279,7 +2279,7 @@ async fn add_invalid_geo_and_then_settings() {
         ]
       },
       "error": {
-        "message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
+        "message": "Index `test`: Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
         "code": "invalid_document_geo_field",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
diff --git a/crates/meilisearch/tests/documents/errors.rs b/crates/meilisearch/tests/documents/errors.rs
index c90b9ed49..1e361fefb 100644
--- a/crates/meilisearch/tests/documents/errors.rs
+++ b/crates/meilisearch/tests/documents/errors.rs
@@ -604,7 +604,7 @@ async fn delete_document_by_filter() {
         "originalFilter": "\"doggo = bernese\""
       },
       "error": {
-        "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
+        "message": "Index `EMPTY_INDEX`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
         "code": "invalid_document_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
         "originalFilter": "\"catto = jorts\""
       },
       "error": {
-        "message": "Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
+        "message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
         "code": "invalid_document_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
diff --git a/crates/meilisearch/tests/index/update_index.rs b/crates/meilisearch/tests/index/update_index.rs
index 36ec27306..f991c3580 100644
--- a/crates/meilisearch/tests/index/update_index.rs
+++ b/crates/meilisearch/tests/index/update_index.rs
@@ -95,7 +95,7 @@ async fn error_update_existing_primary_key() {
     let response = index.wait_task(2).await;
 
     let expected_response = json!({
-        "message": "Index already has a primary key: `id`.",
+        "message": "Index `test`: Index already has a primary key: `id`.",
         "code": "index_primary_key_already_exists",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#index_primary_key_already_exists"
diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs
index 6840f8fba..ab50e2aa1 100644
--- a/crates/meilisearch/tests/search/errors.rs
+++ b/crates/meilisearch/tests/search/errors.rs
@@ -711,7 +711,7 @@ async fn filter_invalid_attribute_array() {
     index.wait_task(task.uid()).await;
 
     let expected_response = json!({
-        "message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
+        "message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
         "code": "invalid_search_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -733,7 +733,7 @@ async fn filter_invalid_attribute_string() {
     index.wait_task(task.uid()).await;
 
     let expected_response = json!({
-        "message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
+        "message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
         "code": "invalid_search_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -940,7 +940,7 @@ async fn sort_unsortable_attribute() {
     index.wait_task(response.uid()).await.succeeded();
 
     let expected_response = json!({
-        "message": "Attribute `title` is not sortable. Available sortable attributes are: `id`.",
+        "message": format!("Index `{}`: Attribute `title` is not sortable. Available sortable attributes are: `id`.", index.uid),
         "code": "invalid_search_sort",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@@ -998,7 +998,7 @@ async fn sort_unset_ranking_rule() {
     index.wait_task(response.uid()).await.succeeded();
 
     let expected_response = json!({
-        "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
+        "message": format!("Index `{}`: You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.", index.uid),
         "code": "invalid_search_sort",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@@ -1024,19 +1024,18 @@ async fn search_on_unknown_field() {
         index.update_settings_searchable_attributes(json!(["id", "title"])).await;
     index.wait_task(response.uid()).await.succeeded();
 
+    let expected_response = json!({
+        "message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
+        "code": "invalid_search_attributes_to_search_on",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
+    });
     index
         .search(
             json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
             |response, code| {
-                snapshot!(code, @"400 Bad Request");
-                snapshot!(json_string!(response), @r###"
-                {
-                  "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
-                  "code": "invalid_search_attributes_to_search_on",
-                  "type": "invalid_request",
-                  "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
-                }
-                "###);
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
             },
         )
         .await;
@@ -1050,19 +1049,18 @@ async fn search_on_unknown_field_plus_joker() {
         index.update_settings_searchable_attributes(json!(["id", "title"])).await;
     index.wait_task(response.uid()).await.succeeded();
 
+    let expected_response = json!({
+        "message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
+        "code": "invalid_search_attributes_to_search_on",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
+    });
     index
         .search(
             json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
             |response, code| {
-                snapshot!(code, @"400 Bad Request");
-                snapshot!(json_string!(response), @r###"
-                {
-                  "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
-                  "code": "invalid_search_attributes_to_search_on",
-                  "type": "invalid_request",
-                  "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
-                }
-                "###);
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
             },
         )
         .await;
@@ -1071,15 +1069,8 @@ async fn search_on_unknown_field_plus_joker() {
         .search(
             json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
             |response, code| {
-                snapshot!(code, @"400 Bad Request");
-                snapshot!(json_string!(response), @r###"
-                {
-                  "message": "Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
-                  "code": "invalid_search_attributes_to_search_on",
-                  "type": "invalid_request",
-                  "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
-                }
-                "###);
+                assert_eq!(response, expected_response);
+                assert_eq!(code, 400);
             },
         )
         .await;
@@ -1092,47 +1083,44 @@ async fn distinct_at_search_time() {
     let (task, _) = index.create(None).await;
     index.wait_task(task.uid()).await.succeeded();
 
+    let expected_response = json!({
+        "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid),
+        "code": "invalid_search_distinct",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
+    });
     let (response, code) =
         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
-    snapshot!(code, @"400 Bad Request");
-    snapshot!(response, @r###"
-    {
-      "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
-      "code": "invalid_search_distinct",
-      "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
-    }
-    "###);
+    assert_eq!(response, expected_response);
+    assert_eq!(code, 400);
 
     let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
     index.wait_task(task.uid()).await;
 
+    let expected_response = json!({
+        "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid),
+        "code": "invalid_search_distinct",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
+    });
     let (response, code) =
         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
-    snapshot!(code, @"400 Bad Request");
-    snapshot!(response, @r###"
-    {
-      "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.",
-      "code": "invalid_search_distinct",
-      "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
-    }
-    "###);
+    assert_eq!(response, expected_response);
+    assert_eq!(code, 400);
 
     let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
     index.wait_task(task.uid()).await;
 
+    let expected_response = json!({
+        "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid),
+        "code": "invalid_search_distinct",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
+    });
     let (response, code) =
         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
-    snapshot!(code, @"400 Bad Request");
-    snapshot!(response, @r###"
-    {
-      "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.",
-      "code": "invalid_search_distinct",
-      "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
-    }
-    "###);
+    assert_eq!(response, expected_response);
+    assert_eq!(code, 400);
 
     let (response, code) =
         index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
diff --git a/crates/meilisearch/tests/search/multi.rs b/crates/meilisearch/tests/search/multi.rs
index 8d7340f0d..9377f435a 100644
--- a/crates/meilisearch/tests/search/multi.rs
+++ b/crates/meilisearch/tests/search/multi.rs
@@ -1070,7 +1070,7 @@ async fn federation_one_query_error() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Inside `.queries[1]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
+      "message": "Inside `.queries[1]`: Index `nested`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
       "code": "invalid_search_filter",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -1102,7 +1102,7 @@ async fn federation_one_query_sort_error() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
+      "message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
       "code": "invalid_search_sort",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@@ -1166,7 +1166,7 @@ async fn federation_multiple_query_errors() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Inside `.queries[0]`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
+      "message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not filterable. This index does not have configured filterable attributes.\n1:6 title = toto",
       "code": "invalid_search_filter",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -1198,7 +1198,7 @@ async fn federation_multiple_query_sort_errors() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Inside `.queries[0]`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
+      "message": "Inside `.queries[0]`: Index `test`: Attribute `title` is not sortable. This index does not have configured sortable attributes.",
       "code": "invalid_search_sort",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
@@ -1231,7 +1231,7 @@ async fn federation_multiple_query_errors_interleaved() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Inside `.queries[1]`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
+      "message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not filterable. This index does not have configured filterable attributes.\n1:7 doggos IN [intel, kefir]",
       "code": "invalid_search_filter",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@@ -1264,7 +1264,7 @@ async fn federation_multiple_query_sort_errors_interleaved() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Inside `.queries[1]`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
+      "message": "Inside `.queries[1]`: Index `nested`: Attribute `doggos` is not sortable. This index does not have configured sortable attributes.",
       "code": "invalid_search_sort",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
diff --git a/crates/meilisearch/tests/tasks/mod.rs b/crates/meilisearch/tests/tasks/mod.rs
index fc05ee4ca..c9d3f31ed 100644
--- a/crates/meilisearch/tests/tasks/mod.rs
+++ b/crates/meilisearch/tests/tasks/mod.rs
@@ -448,7 +448,7 @@ async fn test_summarized_delete_documents_by_filter() {
         "originalFilter": "\"doggo = bernese\""
       },
       "error": {
-        "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
+        "message": "Index `test`: Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
         "code": "invalid_document_filter",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
diff --git a/crates/meilisearch/tests/vector/binary_quantized.rs b/crates/meilisearch/tests/vector/binary_quantized.rs
index 560c4e2f2..790df5459 100644
--- a/crates/meilisearch/tests/vector/binary_quantized.rs
+++ b/crates/meilisearch/tests/vector/binary_quantized.rs
@@ -318,7 +318,7 @@ async fn try_to_disable_binary_quantization() {
         }
       },
       "error": {
-        "message": "`.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
+        "message": "Index `doggo`: `.embedders.manual.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors.",
         "code": "invalid_settings_embedders",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders"
diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs
index bb20d7b2a..adad9fa81 100644
--- a/crates/meilisearch/tests/vector/mod.rs
+++ b/crates/meilisearch/tests/vector/mod.rs
@@ -250,7 +250,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n  - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n  - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -280,7 +280,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n  - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n  - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -311,7 +311,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -340,7 +340,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -369,7 +369,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -398,7 +398,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -440,7 +440,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -469,7 +469,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -498,7 +498,7 @@ async fn user_provided_embeddings_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
+        "message": "Index `doggo`: Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
         "code": "invalid_vectors_type",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@@ -539,7 +539,7 @@ async fn user_provided_vectors_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
+        "message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -569,7 +569,7 @@ async fn user_provided_vectors_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
+        "message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -599,7 +599,7 @@ async fn user_provided_vectors_error() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
+        "message": "Index `doggo`: While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
diff --git a/crates/meilisearch/tests/vector/openai.rs b/crates/meilisearch/tests/vector/openai.rs
index 99aa1f710..b02111639 100644
--- a/crates/meilisearch/tests/vector/openai.rs
+++ b/crates/meilisearch/tests/vector/openai.rs
@@ -713,7 +713,7 @@ async fn bad_api_key() {
         }
       },
       "error": {
-        "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n  - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
+        "message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n  - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -757,7 +757,7 @@ async fn bad_api_key() {
         }
       },
       "error": {
-        "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n  - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
+        "message": "Index `doggo`: While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n  - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
diff --git a/crates/meilisearch/tests/vector/rest.rs b/crates/meilisearch/tests/vector/rest.rs
index cadc54f24..bf6876fbe 100644
--- a/crates/meilisearch/tests/vector/rest.rs
+++ b/crates/meilisearch/tests/vector/rest.rs
@@ -985,7 +985,7 @@ async fn bad_settings() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n  - invalid type: map, expected a sequence",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n  - invalid type: map, expected a sequence",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1025,7 +1025,7 @@ async fn bad_settings() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
+        "message": "Index `doggo`: While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1178,7 +1178,7 @@ async fn server_returns_bad_request() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with user error: sent a bad request to embedding server\n  - Hint: check that the `request` in the embedder configuration matches the remote server's API\n  - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with user error: sent a bad request to embedding server\n  - Hint: check that the `request` in the embedder configuration matches the remote server's API\n  - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1247,7 +1247,7 @@ async fn server_returns_bad_request() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n  - Hint: check that the `request` in the embedder configuration matches the remote server's API\n  - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
+        "message": "Index `doggo`: While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n  - Hint: check that the `request` in the embedder configuration matches the remote server's API\n  - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1306,7 +1306,7 @@ async fn server_returns_bad_response() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1362,7 +1362,7 @@ async fn server_returns_bad_response() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n  - invalid type: map, expected a sequence",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n  - invalid type: map, expected a sequence",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1414,7 +1414,7 @@ async fn server_returns_bad_response() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n  - invalid type: map, expected f32",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n  - invalid type: map, expected f32",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1478,7 +1478,7 @@ async fn server_returns_bad_response() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1542,7 +1542,7 @@ async fn server_returns_bad_response() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n  - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with runtime error: error extracting embeddings from the response:\n  - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n  - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1908,7 +1908,7 @@ async fn server_custom_header() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with user error: could not authenticate against embedding server\n  - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with user error: could not authenticate against embedding server\n  - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -1951,7 +1951,7 @@ async fn server_custom_header() {
         }
       },
       "error": {
-        "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with user error: could not authenticate against embedding server\n  - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration",
+        "message": "Index `doggo`: Error while generating embeddings: runtime error: could not determine model dimensions:\n  - test embedding failed with user error: could not authenticate against embedding server\n  - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n  - Hint: Check the `apiKey` parameter in the embedder configuration",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@@ -2099,7 +2099,7 @@ async fn searchable_reindex() {
         ]
       },
       "error": {
-        "message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n  - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
+        "message": "Index `doggo`: While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n  - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
         "code": "vector_embedding_error",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#vector_embedding_error"

From 08f2c696b0c663c8a668586448e3986d47c41f04 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Mon, 9 Dec 2024 09:35:51 +0100
Subject: [PATCH 105/158] Allow xtask bench to proceed without a commit message

---
 crates/xtask/src/bench/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/xtask/src/bench/mod.rs b/crates/xtask/src/bench/mod.rs
index 891742528..deec120fa 100644
--- a/crates/xtask/src/bench/mod.rs
+++ b/crates/xtask/src/bench/mod.rs
@@ -139,7 +139,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
     rt.block_on(async {
         dashboard_client.send_machine_info(&env).await?;
 
-        let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
+        let commit_message = build_info.commit_msg.unwrap_or_default().split('\n').next().unwrap();
         let max_workloads = args.workload_file.len();
         let reason: Option<&str> = args.reason.as_deref();
         let invocation_uuid = dashboard_client.create_invocation(build_info.clone(), commit_message, env, max_workloads, reason).await?;

From bcfed7088863746e096c0d17f5c6b19b6d57ffb8 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Mon, 9 Dec 2024 10:08:02 +0100
Subject: [PATCH 106/158] Revert "Merge #5125"

This reverts commit 9a9383643f9a6b5ee9ab2ace3e9d63b920d94a53, reversing
changes made to cac355bfa7e72ca3c5c02cacb4f2fcd3f2dd336e.
---
 crates/meilisearch/src/option.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/crates/meilisearch/src/option.rs b/crates/meilisearch/src/option.rs
index 7c59f0607..7e87a5a2c 100644
--- a/crates/meilisearch/src/option.rs
+++ b/crates/meilisearch/src/option.rs
@@ -654,9 +654,8 @@ impl Opt {
 
 #[derive(Debug, Default, Clone, Parser, Deserialize)]
 pub struct IndexerOpts {
-    /// Specifies the maximum resident memory that Meilisearch can use for indexing.
-    /// By default, Meilisearch limits the RAM usage to 5% of the total available memory.
-    /// Note that the underlying store utilizes memory-mapping and makes use of the rest.
+    /// Sets the maximum amount of RAM Meilisearch can use when indexing. By default, Meilisearch
+    /// uses no more than two thirds of available memory.
     #[clap(long, env = MEILI_MAX_INDEXING_MEMORY, default_value_t)]
     #[serde(default)]
     pub max_indexing_memory: MaxMemory,
@@ -715,7 +714,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
     }
 }
 
-/// A type used to detect the max resident memory available and use 5% of it.
+/// A type used to detect the max memory available and use 2/3 of it.
 #[derive(Debug, Clone, Copy, Deserialize, Serialize)]
 pub struct MaxMemory(Option<Byte>);
 
@@ -729,7 +728,7 @@ impl FromStr for MaxMemory {
 
 impl Default for MaxMemory {
     fn default() -> MaxMemory {
-        MaxMemory(total_memory_bytes().map(|bytes| bytes * 5 / 100).map(Byte::from_u64))
+        MaxMemory(total_memory_bytes().map(|bytes| bytes * 2 / 3).map(Byte::from_u64))
     }
 }
 

From f5dd8dfc3e57cfa36fc3ccbefe73de0706a156fd Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Mon, 9 Dec 2024 10:26:30 +0100
Subject: [PATCH 107/158] Rollback max memory usage changes

---
 crates/milli/src/update/new/indexer/mod.rs | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 9ee7577a5..59088bd47 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -80,6 +80,15 @@ where
     let mut bbbuffers = Vec::new();
     let finished_extraction = AtomicBool::new(false);
 
+    // We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch
+    // is because we still use the old indexer for the settings and it is highly impacted by the
+    // max memory. So we keep the changes here and will remove these changes once we use the new
+    // indexer to also index settings. Related to #5125 and #5141.
+    let grenad_parameters = GrenadParameters {
+        max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100),
+        ..grenad_parameters
+    };
+
     // We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
     let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
     let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(

From 71f59749dca59bec6119da76cef5d984864b43fb Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 9 Dec 2024 15:44:06 +0100
Subject: [PATCH 108/158] Reduce union impact in merging

---
 crates/milli/src/update/new/merger.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/merger.rs b/crates/milli/src/update/new/merger.rs
index 9728f99d6..9e87388a2 100644
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@@ -235,8 +235,12 @@ fn merge_cbo_bitmaps(
         (Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
         (Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
         (Some(current), Some(del), add) => {
+            debug_assert!(
+                del.is_subset(&current),
+                "del is not a subset of current, which must be impossible."
+            );
             let output = match add {
-                Some(add) => (&current - del) | add,
+                Some(add) => (&current - (&del - &add)) | (add - del),
                 None => &current - del,
             };
             if output.is_empty() {

From 07f42e805712fde3087829d9400e767384de7a7f Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 9 Dec 2024 15:45:12 +0100
Subject: [PATCH 109/158] Do not index a filed count when no word is counted

---
 .../extract/searchable/extract_word_docids.rs  | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
index 06fb747c6..5e85eb1c8 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
     exact_word_docids: BalancedCaches<'extractor>,
     word_position_docids: BalancedCaches<'extractor>,
     fid_word_count_docids: BalancedCaches<'extractor>,
-    fid_word_count: HashMap<FieldId, (usize, usize)>,
+    fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
     current_docid: Option<DocumentId>,
 }
 
@@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
 
         self.fid_word_count
             .entry(field_id)
-            .and_modify(|(_current_count, new_count)| *new_count += 1)
-            .or_insert((0, 1));
+            .and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
+            .or_insert((None, Some(1)));
         self.current_docid = Some(docid);
 
         Ok(())
@@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
 
         self.fid_word_count
             .entry(field_id)
-            .and_modify(|(current_count, _new_count)| *current_count += 1)
-            .or_insert((1, 0));
+            .and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
+            .or_insert((Some(1), None));
 
         self.current_docid = Some(docid);
 
@@ -141,14 +141,18 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
     fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
         for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
             if current_count != new_count {
-                if current_count <= MAX_COUNTED_WORDS {
+                if let Some(current_count) =
+                    current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
+                {
                     buffer.clear();
                     buffer.extend_from_slice(&fid.to_be_bytes());
                     buffer.push(current_count as u8);
                     self.fid_word_count_docids
                         .insert_del_u32(buffer, self.current_docid.unwrap())?;
                 }
-                if new_count <= MAX_COUNTED_WORDS {
+                if let Some(new_count) =
+                    new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
+                {
                     buffer.clear();
                     buffer.extend_from_slice(&fid.to_be_bytes());
                     buffer.push(new_count as u8);

From 7cf6707ed3d19ff38819f2b824c546b3d64f960b Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 10 Dec 2024 11:05:42 +0100
Subject: [PATCH 110/158] Extend test to add the ==512 bytes case

---
 .../tests/documents/add_documents.rs          | 85 ++++++++++++++++++-
 1 file changed, 81 insertions(+), 4 deletions(-)

diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs
index 750bf7ae9..d72b1a7a8 100644
--- a/crates/meilisearch/tests/documents/add_documents.rs
+++ b/crates/meilisearch/tests/documents/add_documents.rs
@@ -1264,15 +1264,18 @@ async fn error_add_documents_bad_document_id() {
     let server = Server::new().await;
     let index = server.index("test");
     index.create(Some("docid")).await;
+
+    // unsupported characters
+
     let documents = json!([
         {
             "docid": "foo & bar",
             "content": "foobar"
         }
     ]);
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
-    let (response, code) = index.get_task(1).await;
+    let (value, _code) = index.add_documents(documents, None).await;
+    index.wait_task(value.uid()).await;
+    let (response, code) = index.get_task(value.uid()).await;
     snapshot!(code, @"200 OK");
     snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
         @r###"
@@ -1288,7 +1291,81 @@ async fn error_add_documents_bad_document_id() {
         "indexedDocuments": 0
       },
       "error": {
-        "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
+        "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
+        "code": "invalid_document_id",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_id"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // More than 512 bytes
+    let documents = json!([
+        {
+            "docid": "a".repeat(600),
+            "content": "foobar"
+        }
+    ]);
+    let (value, _code) = index.add_documents(documents, None).await;
+    index.wait_task(value.uid()).await;
+    let (response, code) = index.get_task(value.uid()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+      @r###"
+    {
+      "uid": 2,
+      "batchUid": 2,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
+        "code": "invalid_document_id",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_id"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // Exactly 512 bytes
+    let documents = json!([
+        {
+            "docid": "a".repeat(512),
+            "content": "foobar"
+        }
+    ]);
+    let (value, _code) = index.add_documents(documents, None).await;
+    index.wait_task(value.uid()).await;
+    let (response, code) = index.get_task(value.uid()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+    @r###"
+    {
+      "uid": 3,
+      "batchUid": 3,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
         "code": "invalid_document_id",
         "type": "invalid_request",
         "link": "https://docs.meilisearch.com/errors#invalid_document_id"

From e610af36aadb429c4cba3599d15e22463ba21e3c Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 10 Dec 2024 11:06:24 +0100
Subject: [PATCH 111/158] User failure for documents with docid of ==512 bytes

---
 crates/milli/src/documents/primary_key.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/documents/primary_key.rs b/crates/milli/src/documents/primary_key.rs
index fb8b3d027..c1dd9a9b8 100644
--- a/crates/milli/src/documents/primary_key.rs
+++ b/crates/milli/src/documents/primary_key.rs
@@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool {
 
 pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
     if document_id.is_empty()
-        || document_id.len() > 512
+        || document_id.len() >= 512
         || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
     {
         None

From 866ac91be3c38d83535e2b3b58a3b90238fa8960 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 10 Dec 2024 11:06:58 +0100
Subject: [PATCH 112/158] Fix error messages

---
 crates/index-scheduler/src/error.rs   | 5 +++--
 crates/meilisearch-types/src/error.rs | 2 +-
 crates/milli/src/error.rs             | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs
index 5fb04828c..f6ee1f685 100644
--- a/crates/index-scheduler/src/error.rs
+++ b/crates/index-scheduler/src/error.rs
@@ -1,12 +1,13 @@
 use std::fmt::Display;
 
-use crate::TaskId;
 use meilisearch_types::batches::BatchId;
 use meilisearch_types::error::{Code, ErrorCode};
 use meilisearch_types::tasks::{Kind, Status};
 use meilisearch_types::{heed, milli};
 use thiserror::Error;
 
+use crate::TaskId;
+
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum DateField {
     BeforeEnqueuedAt,
@@ -103,7 +104,7 @@ pub enum Error {
     )]
     InvalidTaskCanceledBy { canceled_by: String },
     #[error(
-        "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
+        "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
     )]
     InvalidIndexUid { index_uid: String },
     #[error("Task `{0}` not found.")]
diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs
index afc876b42..0c4027899 100644
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@@ -550,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
             "the value of `id` is invalid. \
             A document identifier can be of type integer or string, \
             only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
-            and can not be more than 512 bytes."
+            and can not be more than 511 bytes."
         )
     }
 }
diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs
index a6774a7bd..2bd57bba5 100644
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@@ -114,7 +114,7 @@ pub enum UserError {
         "Document identifier `{}` is invalid. \
 A document identifier can be of type integer or string, \
 only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
-and can not be more than 512 bytes.", .document_id.to_string()
+and can not be more than 511 bytes.", .document_id.to_string()
     )]
     InvalidDocumentId { document_id: Value },
     #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]

From 89637bcaafc43a353d825a7478b3c3b58111e5d8 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 10 Dec 2024 11:12:27 +0100
Subject: [PATCH 113/158] Use bumparaw-collections in Meilisearch/milli

---
 Cargo.lock                                    | 33 ++++++-----
 crates/index-scheduler/Cargo.toml             |  6 +-
 crates/meilisearch-types/Cargo.toml           |  2 +-
 .../meilisearch-types/src/document_formats.rs |  2 +-
 crates/milli/Cargo.toml                       |  2 +-
 crates/milli/src/prompt/document.rs           | 59 ++++++++++---------
 crates/milli/src/update/new/document.rs       |  2 +-
 crates/milli/src/update/new/extract/cache.rs  |  6 +-
 .../extract/searchable/tokenize_document.rs   |  2 +-
 crates/milli/src/update/new/indexer/de.rs     | 11 ++--
 .../update/new/indexer/document_operation.rs  | 12 ++--
 crates/milli/src/update/new/indexer/mod.rs    |  2 +-
 .../src/update/new/indexer/partial_dump.rs    |  5 +-
 .../update/new/indexer/update_by_function.rs  |  2 +-
 .../milli/src/update/new/vector_document.rs   |  2 +-
 15 files changed, 78 insertions(+), 70 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 3c2fb711e..a57391bfc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -706,6 +706,20 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "bumparaw-collections"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7495aa71334069997d1b4ff536a4a01542981774a1654d4dfb00f29db3aedcef"
+dependencies = [
+ "allocator-api2",
+ "bitpacking",
+ "bumpalo",
+ "hashbrown 0.15.1",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "byte-unit"
 version = "5.1.4"
@@ -2617,6 +2631,7 @@ dependencies = [
  "big_s",
  "bincode",
  "bumpalo",
+ "bumparaw-collections",
  "crossbeam-channel",
  "csv",
  "derive_builder 0.20.0",
@@ -2631,7 +2646,6 @@ dependencies = [
  "meilisearch-types",
  "memmap2",
  "page_size",
- "raw-collections",
  "rayon",
  "roaring",
  "serde",
@@ -3549,6 +3563,7 @@ dependencies = [
  "actix-web",
  "anyhow",
  "bumpalo",
+ "bumparaw-collections",
  "convert_case 0.6.0",
  "csv",
  "deserr",
@@ -3561,7 +3576,6 @@ dependencies = [
  "meili-snap",
  "memmap2",
  "milli",
- "raw-collections",
  "roaring",
  "serde",
  "serde-cs",
@@ -3618,6 +3632,7 @@ dependencies = [
  "bincode",
  "bstr",
  "bumpalo",
+ "bumparaw-collections",
  "bytemuck",
  "byteorder",
  "candle-core",
@@ -3656,7 +3671,6 @@ dependencies = [
  "once_cell",
  "ordered-float",
  "rand",
- "raw-collections",
  "rayon",
  "rayon-par-bridge",
  "rhai",
@@ -4487,19 +4501,6 @@ dependencies = [
  "rand",
 ]
 
-[[package]]
-name = "raw-collections"
-version = "0.1.0"
-source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
-dependencies = [
- "allocator-api2",
- "bitpacking",
- "bumpalo",
- "hashbrown 0.15.1",
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "raw-cpuid"
 version = "10.7.0"
diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml
index ad4c1b4b9..a2b9debec 100644
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@@ -13,6 +13,8 @@ license.workspace = true
 [dependencies]
 anyhow = "1.0.86"
 bincode = "1.3.3"
+bumpalo = "3.16.0"
+bumparaw-collections = "0.1.1"
 csv = "1.3.0"
 derive_builder = "0.20.0"
 dump = { path = "../dump" }
@@ -21,8 +23,8 @@ file-store = { path = "../file-store" }
 flate2 = "1.0.30"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
+memmap2 = "0.9.4"
 page_size = "0.6.0"
-raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
 rayon = "1.10.0"
 roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
@@ -30,7 +32,6 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
 tempfile = "3.10.1"
 thiserror = "1.0.61"
-memmap2 = "0.9.4"
 time = { version = "0.3.36", features = [
     "serde-well-known",
     "formatting",
@@ -40,7 +41,6 @@ time = { version = "0.3.36", features = [
 tracing = "0.1.40"
 ureq = "2.10.0"
 uuid = { version = "1.10.0", features = ["serde", "v4"] }
-bumpalo = "3.16.0"
 
 [dev-dependencies]
 arroy = "0.5.0"
diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml
index aca06a018..b91689ed7 100644
--- a/crates/meilisearch-types/Cargo.toml
+++ b/crates/meilisearch-types/Cargo.toml
@@ -24,7 +24,7 @@ flate2 = "1.0.30"
 fst = "0.4.7"
 memmap2 = "0.9.4"
 milli = { path = "../milli" }
-raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
+bumparaw-collections = "0.1.1"
 roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs
index 008be4022..c6e8ad907 100644
--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@@ -4,10 +4,10 @@ use std::io::{self, BufWriter};
 use std::marker::PhantomData;
 
 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use memmap2::Mmap;
 use milli::documents::Error;
 use milli::Object;
-use raw_collections::RawMap;
 use serde::de::{SeqAccess, Visitor};
 use serde::{Deserialize, Deserializer};
 use serde_json::error::Category;
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index 2a959b654..ae1edd168 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] }
 url = "2.5.2"
 rayon-par-bridge = "0.1.0"
 hashbrown = "0.15.0"
-raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
 bumpalo = "3.16.0"
+bumparaw-collections = "0.1.1"
 thread_local = "1.1.8"
 allocator-api2 = "0.2.18"
 rustc-hash = "2.0.0"
diff --git a/crates/milli/src/prompt/document.rs b/crates/milli/src/prompt/document.rs
index dea7946da..5232b6788 100644
--- a/crates/milli/src/prompt/document.rs
+++ b/crates/milli/src/prompt/document.rs
@@ -3,12 +3,12 @@ use std::collections::BTreeMap;
 use std::fmt::{self, Debug};
 
 use bumpalo::Bump;
+use bumparaw_collections::{RawMap, RawVec, Value};
 use liquid::model::{
     ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
     Value as LiquidValue,
 };
 use liquid::{ObjectView, ValueView};
-use raw_collections::{RawMap, RawVec};
 use serde_json::value::RawValue;
 
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
@@ -245,12 +245,12 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
 
 #[derive(Debug)]
 struct ParseableValue<'doc> {
-    value: raw_collections::Value<'doc>,
+    value: Value<'doc>,
 }
 
 impl<'doc> ParseableValue<'doc> {
     pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
-        let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
+        let value = Value::from_raw_value(value, doc_alloc).unwrap();
         Self { value }
     }
 
@@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn render(&self) -> DisplayCow<'_> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Null => LiquidValue::Nil.render(),
             Value::Bool(v) => v.render(),
@@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn source(&self) -> DisplayCow<'_> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Null => LiquidValue::Nil.source(),
             Value::Bool(v) => ValueView::source(v),
@@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn type_name(&self) -> &'static str {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Null => LiquidValue::Nil.type_name(),
             Value::Bool(v) => v.type_name(),
@@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn query_state(&self, state: State) -> bool {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
             Value::Bool(v) => ValueView::query_state(v, state),
@@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn to_kstr(&self) -> KStringCow<'_> {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
             Value::Bool(v) => ValueView::to_kstr(v),
@@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn to_value(&self) -> LiquidValue {
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Null => LiquidValue::Nil,
             Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
             Value::Number(number) => match number {
-                raw_collections::value::Number::PosInt(number) => {
+                Number::PosInt(number) => {
                     let number: i64 = match (*number).try_into() {
                         Ok(number) => number,
                         Err(_) => {
@@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
                     };
                     LiquidValue::Scalar(ScalarCow::new(number))
                 }
-                raw_collections::value::Number::NegInt(number) => {
-                    LiquidValue::Scalar(ScalarCow::new(*number))
-                }
-                raw_collections::value::Number::Finite(number) => {
-                    LiquidValue::Scalar(ScalarCow::new(*number))
-                }
+                Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
+                Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
             },
             Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
             Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
@@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
         match &self.value {
             Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
             Value::Number(number) => match number {
@@ -576,34 +580,35 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 
     fn is_scalar(&self) -> bool {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
         matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
     }
 
     fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
-        if let raw_collections::Value::Array(array) = &self.value {
+        if let Value::Array(array) = &self.value {
             return Some(ParseableArray::as_parseable(array) as _);
         }
         None
     }
 
     fn is_array(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Array(_))
+        matches!(&self.value, bumparaw_collections::Value::Array(_))
     }
 
     fn as_object(&self) -> Option<&dyn ObjectView> {
-        if let raw_collections::Value::Object(object) = &self.value {
+        if let Value::Object(object) = &self.value {
             return Some(ParseableMap::as_parseable(object) as _);
         }
         None
     }
 
     fn is_object(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Object(_))
+        matches!(&self.value, bumparaw_collections::Value::Object(_))
     }
 
     fn is_nil(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Null)
+        matches!(&self.value, bumparaw_collections::Value::Null)
     }
 }
 
diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs
index b1a2218f2..2beefc7d5 100644
--- a/crates/milli/src/update/new/document.rs
+++ b/crates/milli/src/update/new/document.rs
@@ -1,7 +1,7 @@
 use std::collections::{BTreeMap, BTreeSet};
 
+use bumparaw_collections::RawMap;
 use heed::RoTxn;
-use raw_collections::RawMap;
 use serde_json::value::RawValue;
 
 use super::vector_document::VectorDocument;
diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index 658a3127c..09ca60211 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -69,12 +69,12 @@ use std::io::BufReader;
 use std::{io, iter, mem};
 
 use bumpalo::Bump;
+use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
+use bumparaw_collections::map::FrozenMap;
+use bumparaw_collections::{Bbbul, FrozenBbbul};
 use grenad::ReaderCursor;
 use hashbrown::hash_map::RawEntryMut;
 use hashbrown::HashMap;
-use raw_collections::bbbul::{BitPacker, BitPacker4x};
-use raw_collections::map::FrozenMap;
-use raw_collections::{Bbbul, FrozenBbbul};
 use roaring::RoaringBitmap;
 use rustc_hash::FxBuildHasher;
 
diff --git a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
index ffdce5b7e..3aa546272 100644
--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@@ -176,9 +176,9 @@ pub fn tokenizer_builder<'a>(
 #[cfg(test)]
 mod test {
     use bumpalo::Bump;
+    use bumparaw_collections::RawMap;
     use charabia::TokenizerBuilder;
     use meili_snap::snapshot;
-    use raw_collections::RawMap;
     use serde_json::json;
     use serde_json::value::RawValue;
 
diff --git a/crates/milli/src/update/new/indexer/de.rs b/crates/milli/src/update/new/indexer/de.rs
index c9808360e..7fd983f29 100644
--- a/crates/milli/src/update/new/indexer/de.rs
+++ b/crates/milli/src/update/new/indexer/de.rs
@@ -1,6 +1,7 @@
 use std::ops::ControlFlow;
 
 use bumpalo::Bump;
+use bumparaw_collections::RawVec;
 use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
 use serde_json::value::RawValue;
 
@@ -360,7 +361,7 @@ impl<'a> DeserrRawValue<'a> {
 }
 
 pub struct DeserrRawVec<'a> {
-    vec: raw_collections::RawVec<'a>,
+    vec: RawVec<'a>,
     alloc: &'a Bump,
 }
 
@@ -379,7 +380,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
 }
 
 pub struct DeserrRawVecIter<'a> {
-    it: raw_collections::vec::iter::IntoIter<'a>,
+    it: bumparaw_collections::vec::iter::IntoIter<'a>,
     alloc: &'a Bump,
 }
 
@@ -393,7 +394,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
 }
 
 pub struct DeserrRawMap<'a> {
-    map: raw_collections::RawMap<'a>,
+    map: bumparaw_collections::RawMap<'a>,
     alloc: &'a Bump,
 }
 
@@ -416,7 +417,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
 }
 
 pub struct DeserrRawMapIter<'a> {
-    it: raw_collections::map::iter::IntoIter<'a>,
+    it: bumparaw_collections::map::iter::IntoIter<'a>,
     alloc: &'a Bump,
 }
 
@@ -615,7 +616,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
     where
         A: serde::de::SeqAccess<'de>,
     {
-        let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
+        let mut raw_vec = RawVec::new_in(self.alloc);
         while let Some(next) = seq.next_element()? {
             raw_vec.push(next);
         }
diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs
index 2a381d5d1..139cef11b 100644
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -1,9 +1,9 @@
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use hashbrown::hash_map::Entry;
 use heed::RoTxn;
 use memmap2::Mmap;
-use raw_collections::RawMap;
 use rayon::slice::ParallelSlice;
 use serde_json::value::RawValue;
 use serde_json::Deserializer;
@@ -545,8 +545,8 @@ impl MergeChanges for MergeDocumentForReplacement {
         match operations.last() {
             Some(InnerDocOp::Addition(DocumentOffset { content })) => {
                 let document = serde_json::from_slice(content).unwrap();
-                let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                    .map_err(UserError::SerdeJson)?;
+                let document =
+                    RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
 
                 if is_new {
                     Ok(Some(DocumentChange::Insertion(Insertion::create(
@@ -632,8 +632,8 @@ impl MergeChanges for MergeDocumentForUpdates {
                     }
                 };
                 let document = serde_json::from_slice(content).unwrap();
-                let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                    .map_err(UserError::SerdeJson)?;
+                let document =
+                    RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
 
                 Some(Versions::single(document))
             }
@@ -647,7 +647,7 @@ impl MergeChanges for MergeDocumentForUpdates {
                     };
 
                     let document = serde_json::from_slice(content).unwrap();
-                    let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
+                    let document = RawMap::from_raw_value(document, doc_alloc)
                         .map_err(UserError::SerdeJson)?;
                     Ok(document)
                 });
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 59088bd47..00041ecaf 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -4,6 +4,7 @@ use std::sync::{OnceLock, RwLock};
 use std::thread::{self, Builder};
 
 use big_s::S;
+use bumparaw_collections::RawMap;
 use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
 pub use document_deletion::DocumentDeletion;
 pub use document_operation::{DocumentOperation, PayloadStats};
@@ -13,7 +14,6 @@ use heed::{RoTxn, RwTxn};
 use itertools::{merge_join_by, EitherOrBoth};
 pub use partial_dump::PartialDump;
 use rand::SeedableRng as _;
-use raw_collections::RawMap;
 use time::OffsetDateTime;
 pub use update_by_function::UpdateByFunction;
 
diff --git a/crates/milli/src/update/new/indexer/partial_dump.rs b/crates/milli/src/update/new/indexer/partial_dump.rs
index 2cc653813..f687fda99 100644
--- a/crates/milli/src/update/new/indexer/partial_dump.rs
+++ b/crates/milli/src/update/new/indexer/partial_dump.rs
@@ -1,5 +1,6 @@
 use std::ops::DerefMut;
 
+use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
 use serde_json::value::RawValue;
 
@@ -75,8 +76,8 @@ where
             self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
         let external_document_id = external_document_id.to_de();
 
-        let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-            .map_err(InternalError::SerdeJson)?;
+        let document =
+            RawMap::from_raw_value(document, doc_alloc).map_err(InternalError::SerdeJson)?;
 
         let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
         Ok(Some(DocumentChange::Insertion(insertion)))
diff --git a/crates/milli/src/update/new/indexer/update_by_function.rs b/crates/milli/src/update/new/indexer/update_by_function.rs
index a8e3e38a8..59d7098e5 100644
--- a/crates/milli/src/update/new/indexer/update_by_function.rs
+++ b/crates/milli/src/update/new/indexer/update_by_function.rs
@@ -1,4 +1,4 @@
-use raw_collections::RawMap;
+use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
 use rayon::slice::ParallelSlice as _;
 use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs
index 319730db0..419c3dc05 100644
--- a/crates/milli/src/update/new/vector_document.rs
+++ b/crates/milli/src/update/new/vector_document.rs
@@ -1,9 +1,9 @@
 use std::collections::BTreeSet;
 
 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use deserr::{Deserr, IntoValue};
 use heed::RoTxn;
-use raw_collections::RawMap;
 use serde::Serialize;
 use serde_json::value::RawValue;
 

From d075be798a5ec5086c42adb4882e0917a221fa93 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 10 Dec 2024 11:07:10 +0100
Subject: [PATCH 114/158] Fix tests

---
 crates/meilisearch/tests/documents/update_documents.rs | 2 +-
 crates/meilisearch/tests/similar/errors.rs             | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/meilisearch/tests/documents/update_documents.rs b/crates/meilisearch/tests/documents/update_documents.rs
index c0703e81b..aaf529ce5 100644
--- a/crates/meilisearch/tests/documents/update_documents.rs
+++ b/crates/meilisearch/tests/documents/update_documents.rs
@@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() {
     assert_eq!(
         response["error"]["message"],
         json!(
-            r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
+            r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."#
         )
     );
     assert_eq!(response["error"]["code"], json!("invalid_document_id"));
diff --git a/crates/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs
index 1e933e1c0..86fca97ad 100644
--- a/crates/meilisearch/tests/similar/errors.rs
+++ b/crates/meilisearch/tests/similar/errors.rs
@@ -79,7 +79,7 @@ async fn similar_bad_id() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
+      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
       "code": "invalid_similar_id",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_similar_id"
@@ -172,7 +172,7 @@ async fn similar_invalid_id() {
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
+      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
       "code": "invalid_similar_id",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_similar_id"

From 6b269795d25257f34d398b8198386e3a3c768f60 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 10 Dec 2024 14:25:13 +0100
Subject: [PATCH 115/158] Update bumparaw-collections to 0.1.2

---
 Cargo.lock                          | 4 ++--
 crates/index-scheduler/Cargo.toml   | 2 +-
 crates/meilisearch-types/Cargo.toml | 2 +-
 crates/milli/Cargo.toml             | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a57391bfc..34bea88da 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -708,9 +708,9 @@ dependencies = [
 
 [[package]]
 name = "bumparaw-collections"
-version = "0.1.1"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7495aa71334069997d1b4ff536a4a01542981774a1654d4dfb00f29db3aedcef"
+checksum = "833a74d1cb25094593307c17044e4140828b553d1d653bc3ec9928aa88a6d88a"
 dependencies = [
  "allocator-api2",
  "bitpacking",
diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml
index a2b9debec..5d7eb1913 100644
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@@ -14,7 +14,7 @@ license.workspace = true
 anyhow = "1.0.86"
 bincode = "1.3.3"
 bumpalo = "3.16.0"
-bumparaw-collections = "0.1.1"
+bumparaw-collections = "0.1.2"
 csv = "1.3.0"
 derive_builder = "0.20.0"
 dump = { path = "../dump" }
diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml
index b91689ed7..e81e6dd35 100644
--- a/crates/meilisearch-types/Cargo.toml
+++ b/crates/meilisearch-types/Cargo.toml
@@ -24,7 +24,7 @@ flate2 = "1.0.30"
 fst = "0.4.7"
 memmap2 = "0.9.4"
 milli = { path = "../milli" }
-bumparaw-collections = "0.1.1"
+bumparaw-collections = "0.1.2"
 roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml
index ae1edd168..9f113e013 100644
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -92,7 +92,7 @@ url = "2.5.2"
 rayon-par-bridge = "0.1.0"
 hashbrown = "0.15.0"
 bumpalo = "3.16.0"
-bumparaw-collections = "0.1.1"
+bumparaw-collections = "0.1.2"
 thread_local = "1.1.8"
 allocator-api2 = "0.2.18"
 rustc-hash = "2.0.0"

From a751972c5726ff0a23dc433fd9f0702f88e153b9 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 10 Dec 2024 14:25:53 +0100
Subject: [PATCH 116/158] Prefer using a stable than a random hash builder

---
 crates/milli/src/update/new/document.rs         |  7 ++++---
 .../new/extract/searchable/tokenize_document.rs |  3 ++-
 crates/milli/src/update/new/indexer/de.rs       |  3 ++-
 .../update/new/indexer/document_operation.rs    | 17 +++++++++++------
 crates/milli/src/update/new/indexer/mod.rs      |  3 ++-
 .../src/update/new/indexer/partial_dump.rs      |  5 +++--
 .../update/new/indexer/update_by_function.rs    |  9 +++++++--
 crates/milli/src/update/new/vector_document.rs  | 16 +++++++++-------
 8 files changed, 40 insertions(+), 23 deletions(-)

diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs
index 2beefc7d5..930b0c078 100644
--- a/crates/milli/src/update/new/document.rs
+++ b/crates/milli/src/update/new/document.rs
@@ -2,6 +2,7 @@ use std::collections::{BTreeMap, BTreeSet};
 
 use bumparaw_collections::RawMap;
 use heed::RoTxn;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
 
 use super::vector_document::VectorDocument;
@@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);
 
 #[derive(Debug)]
 pub struct Versions<'doc> {
-    data: RawMap<'doc>,
+    data: RawMap<'doc, FxBuildHasher>,
 }
 
 impl<'doc> Versions<'doc> {
     pub fn multiple(
-        mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
+        mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
     ) -> Result<Option<Self>> {
         let Some(data) = versions.next() else { return Ok(None) };
         let mut data = data?;
@@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> {
         Ok(Some(Self::single(data)))
     }
 
-    pub fn single(version: RawMap<'doc>) -> Self {
+    pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
         Self { data: version }
     }
 
diff --git a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
index 3aa546272..1c1605b66 100644
--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@@ -179,6 +179,7 @@ mod test {
     use bumparaw_collections::RawMap;
     use charabia::TokenizerBuilder;
     use meili_snap::snapshot;
+    use rustc_hash::FxBuildHasher;
     use serde_json::json;
     use serde_json::value::RawValue;
 
@@ -234,7 +235,7 @@ mod test {
 
         let bump = Bump::new();
         let document: &RawValue = serde_json::from_str(&document).unwrap();
-        let document = RawMap::from_raw_value(document, &bump).unwrap();
+        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();
 
         let document = Versions::single(document);
         let document = DocumentFromVersions::new(&document);
diff --git a/crates/milli/src/update/new/indexer/de.rs b/crates/milli/src/update/new/indexer/de.rs
index 7fd983f29..4d9fa40a1 100644
--- a/crates/milli/src/update/new/indexer/de.rs
+++ b/crates/milli/src/update/new/indexer/de.rs
@@ -2,6 +2,7 @@ use std::ops::ControlFlow;
 
 use bumpalo::Bump;
 use bumparaw_collections::RawVec;
+use rustc_hash::FxBuildHasher;
 use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
 use serde_json::value::RawValue;
 
@@ -394,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
 }
 
 pub struct DeserrRawMap<'a> {
-    map: bumparaw_collections::RawMap<'a>,
+    map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
     alloc: &'a Bump,
 }
 
diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs
index 139cef11b..0b7ec493e 100644
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -5,6 +5,7 @@ use hashbrown::hash_map::Entry;
 use heed::RoTxn;
 use memmap2::Mmap;
 use rayon::slice::ParallelSlice;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
 use serde_json::Deserializer;
 
@@ -166,8 +167,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
 
         // Only guess the primary key if it is the first document
         let retrieved_primary_key = if previous_offset == 0 {
-            let doc =
-                RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
+            let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
+                .map(Some)
+                .map_err(UserError::SerdeJson)?;
 
             let result = retrieve_or_guess_primary_key(
                 rtxn,
@@ -546,7 +548,8 @@ impl MergeChanges for MergeDocumentForReplacement {
             Some(InnerDocOp::Addition(DocumentOffset { content })) => {
                 let document = serde_json::from_slice(content).unwrap();
                 let document =
-                    RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
+                    RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                        .map_err(UserError::SerdeJson)?;
 
                 if is_new {
                     Ok(Some(DocumentChange::Insertion(Insertion::create(
@@ -633,7 +636,8 @@ impl MergeChanges for MergeDocumentForUpdates {
                 };
                 let document = serde_json::from_slice(content).unwrap();
                 let document =
-                    RawMap::from_raw_value(document, doc_alloc).map_err(UserError::SerdeJson)?;
+                    RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                        .map_err(UserError::SerdeJson)?;
 
                 Some(Versions::single(document))
             }
@@ -647,8 +651,9 @@ impl MergeChanges for MergeDocumentForUpdates {
                     };
 
                     let document = serde_json::from_slice(content).unwrap();
-                    let document = RawMap::from_raw_value(document, doc_alloc)
-                        .map_err(UserError::SerdeJson)?;
+                    let document =
+                        RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                            .map_err(UserError::SerdeJson)?;
                     Ok(document)
                 });
                 Versions::multiple(versions)?
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 00041ecaf..601645385 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -14,6 +14,7 @@ use heed::{RoTxn, RwTxn};
 use itertools::{merge_join_by, EitherOrBoth};
 pub use partial_dump::PartialDump;
 use rand::SeedableRng as _;
+use rustc_hash::FxBuildHasher;
 use time::OffsetDateTime;
 pub use update_by_function::UpdateByFunction;
 
@@ -776,7 +777,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
     index: &Index,
     new_fields_ids_map: &mut FieldsIdsMap,
     primary_key_from_op: Option<&'a str>,
-    first_document: Option<RawMap<'a>>,
+    first_document: Option<RawMap<'a, FxBuildHasher>>,
 ) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
     // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.
 
diff --git a/crates/milli/src/update/new/indexer/partial_dump.rs b/crates/milli/src/update/new/indexer/partial_dump.rs
index f687fda99..6e4abd898 100644
--- a/crates/milli/src/update/new/indexer/partial_dump.rs
+++ b/crates/milli/src/update/new/indexer/partial_dump.rs
@@ -2,6 +2,7 @@ use std::ops::DerefMut;
 
 use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
 
 use super::document_changes::{DocumentChangeContext, DocumentChanges};
@@ -76,8 +77,8 @@ where
             self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
         let external_document_id = external_document_id.to_de();
 
-        let document =
-            RawMap::from_raw_value(document, doc_alloc).map_err(InternalError::SerdeJson)?;
+        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+            .map_err(InternalError::SerdeJson)?;
 
         let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
         Ok(Some(DocumentChange::Insertion(insertion)))
diff --git a/crates/milli/src/update/new/indexer/update_by_function.rs b/crates/milli/src/update/new/indexer/update_by_function.rs
index 59d7098e5..3001648e6 100644
--- a/crates/milli/src/update/new/indexer/update_by_function.rs
+++ b/crates/milli/src/update/new/indexer/update_by_function.rs
@@ -3,6 +3,7 @@ use rayon::iter::IndexedParallelIterator;
 use rayon::slice::ParallelSlice as _;
 use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
 use roaring::RoaringBitmap;
+use rustc_hash::FxBuildHasher;
 
 use super::document_changes::DocumentChangeContext;
 use super::DocumentChanges;
@@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
                         if document_id != new_document_id {
                             Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
                         } else {
-                            let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
-                                .map_err(InternalError::SerdeJson)?;
+                            let raw_new_doc = RawMap::from_raw_value_and_hasher(
+                                raw_new_doc,
+                                FxBuildHasher,
+                                doc_alloc,
+                            )
+                            .map_err(InternalError::SerdeJson)?;
 
                             Ok(Some(DocumentChange::Update(Update::create(
                                 docid,
diff --git a/crates/milli/src/update/new/vector_document.rs b/crates/milli/src/update/new/vector_document.rs
index 419c3dc05..8d14a749d 100644
--- a/crates/milli/src/update/new/vector_document.rs
+++ b/crates/milli/src/update/new/vector_document.rs
@@ -4,6 +4,7 @@ use bumpalo::Bump;
 use bumparaw_collections::RawMap;
 use deserr::{Deserr, IntoValue};
 use heed::RoTxn;
+use rustc_hash::FxBuildHasher;
 use serde::Serialize;
 use serde_json::value::RawValue;
 
@@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> {
     docid: DocumentId,
     embedding_config: Vec<IndexEmbeddingConfig>,
     index: &'t Index,
-    vectors_field: Option<RawMap<'t>>,
+    vectors_field: Option<RawMap<'t, FxBuildHasher>>,
     rtxn: &'t RoTxn<'t>,
     doc_alloc: &'t Bump,
 }
@@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> {
         };
         let vectors = document.vectors_field()?;
         let vectors_field = match vectors {
-            Some(vectors) => {
-                Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
-            }
+            Some(vectors) => Some(
+                RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
+                    .map_err(InternalError::SerdeJson)?,
+            ),
             None => None,
         };
 
@@ -220,7 +222,7 @@ fn entry_from_raw_value(
 
 pub struct VectorDocumentFromVersions<'doc> {
     external_document_id: &'doc str,
-    vectors: RawMap<'doc>,
+    vectors: RawMap<'doc, FxBuildHasher>,
     embedders: &'doc EmbeddingConfigs,
 }
 
@@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
     ) -> Result<Option<Self>> {
         let document = DocumentFromVersions::new(versions);
         if let Some(vectors_field) = document.vectors_field()? {
-            let vectors =
-                RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
+            let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
+                .map_err(UserError::SerdeJson)?;
             Ok(Some(Self { external_document_id, vectors, embedders }))
         } else {
             Ok(None)

From aeb6b74725b3eecda3eecec20b4f37d815dc929c Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 10 Dec 2024 15:52:22 +0100
Subject: [PATCH 117/158] Make sure we use an FxHashBuilder on the Value

---
 Cargo.lock                          |  4 ++--
 crates/milli/src/prompt/document.rs | 24 +++++++++++++++---------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 34bea88da..9476506ec 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -708,9 +708,9 @@ dependencies = [
 
 [[package]]
 name = "bumparaw-collections"
-version = "0.1.2"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "833a74d1cb25094593307c17044e4140828b553d1d653bc3ec9928aa88a6d88a"
+checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
 dependencies = [
  "allocator-api2",
  "bitpacking",
diff --git a/crates/milli/src/prompt/document.rs b/crates/milli/src/prompt/document.rs
index 5232b6788..ae0a506ac 100644
--- a/crates/milli/src/prompt/document.rs
+++ b/crates/milli/src/prompt/document.rs
@@ -9,6 +9,7 @@ use liquid::model::{
     Value as LiquidValue,
 };
 use liquid::{ObjectView, ValueView};
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
 
 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
@@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
 }
 
 impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
-    fn as_debug(&self) -> &dyn fmt::Debug {
+    fn as_debug(&self) -> &dyn Debug {
         self
     }
     fn render(&self) -> liquid::model::DisplayCow<'_> {
@@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
     }
 }
 
-#[derive(Debug)]
 struct ParseableValue<'doc> {
-    value: Value<'doc>,
+    value: Value<'doc, FxBuildHasher>,
 }
 
 impl<'doc> ParseableValue<'doc> {
     pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
-        let value = Value::from_raw_value(value, doc_alloc).unwrap();
+        let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
         Self { value }
     }
 
@@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
 }
 
 // transparent newtype for implementing ValueView
-#[repr(transparent)]
 #[derive(Debug)]
-struct ParseableMap<'doc>(RawMap<'doc>);
+#[repr(transparent)]
+struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);
 
 // transparent newtype for implementing ValueView
-#[repr(transparent)]
 #[derive(Debug)]
+#[repr(transparent)]
 struct ParseableArray<'doc>(RawVec<'doc>);
 
 impl<'doc> ParseableMap<'doc> {
-    pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
+    pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
         // SAFETY: repr(transparent)
-        unsafe { &*(map as *const RawMap as *const Self) }
+        unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
     }
 }
 
@@ -612,6 +612,12 @@ impl<'doc> ValueView for ParseableValue<'doc> {
     }
 }
 
+impl Debug for ParseableValue<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ParseableValue").field("value", &self.value).finish()
+    }
+}
+
 struct ArraySource<'s, 'doc> {
     s: &'s RawVec<'doc>,
 }

From bb00e70087a58328dc1062ddc766c68097aeadd2 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 10:39:04 +0100
Subject: [PATCH 118/158] Reintroduce the document addition logs

---
 crates/index-scheduler/src/batch.rs | 35 ++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 9a3ba4929..93e9a1404 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -33,7 +33,9 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
-use meilisearch_types::milli::update::{IndexDocumentsMethod, Settings as MilliSettings};
+use meilisearch_types::milli::update::{
+    DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
+};
 use meilisearch_types::milli::vector::parsed_vectors::{
     ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
 };
@@ -1310,9 +1312,9 @@ impl IndexScheduler {
                     )
                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
 
-                let mut addition = 0;
+                let mut candidates_count = 0;
                 for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
-                    addition += stats.document_count;
+                    candidates_count += stats.document_count;
                     match stats.error {
                         Some(error) => {
                             task.status = Status::Failed;
@@ -1358,6 +1360,13 @@ impl IndexScheduler {
                     )
                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
 
+                    let addition = DocumentAdditionResult {
+                        indexed_documents: candidates_count,
+                        number_of_documents: index
+                            .number_of_documents(index_wtxn)
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    };
+
                     tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
 
@@ -1436,6 +1445,7 @@ impl IndexScheduler {
                         }
                     };
 
+                    let candidates_count = candidates.len();
                     let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
                     let document_changes = pool
                         .install(|| {
@@ -1464,7 +1474,14 @@ impl IndexScheduler {
                     )
                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
-                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
+                    let addition = DocumentAdditionResult {
+                        indexed_documents: candidates_count,
+                        number_of_documents: index
+                            .number_of_documents(index_wtxn)
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    };
+
+                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
 
                 match result_count {
@@ -1585,6 +1602,7 @@ impl IndexScheduler {
                     };
 
                     let mut indexer = indexer::DocumentDeletion::new();
+                    let candidates_count = to_delete.len();
                     indexer.delete_documents_by_docids(to_delete);
                     let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
                     let embedders = index
@@ -1607,7 +1625,14 @@ impl IndexScheduler {
                     )
                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
-                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
+                    let addition = DocumentAdditionResult {
+                        indexed_documents: candidates_count,
+                        number_of_documents: index
+                            .number_of_documents(index_wtxn)
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    };
+
+                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                 }
 
                 Ok(tasks)

From 479607e5dd9185a1a69ec39f05a6c97be8e87c98 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 10 Dec 2024 15:12:50 +0100
Subject: [PATCH 119/158] Convert update files from OBKV to ndjson

---
 Cargo.lock                            | 13 +++---
 crates/meilitool/Cargo.toml           |  5 ++-
 crates/meilitool/src/main.rs          |  2 +-
 crates/meilitool/src/upgrade/mod.rs   |  5 +++
 crates/meilitool/src/upgrade/v1_12.rs | 63 +++++++++++++++++++++++++++
 5 files changed, 81 insertions(+), 7 deletions(-)
 create mode 100644 crates/meilitool/src/upgrade/v1_12.rs

diff --git a/Cargo.lock b/Cargo.lock
index 9476506ec..ae2715f25 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2661,12 +2661,12 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f"
 dependencies = [
  "equivalent",
- "hashbrown 0.14.3",
+ "hashbrown 0.15.1",
  "serde",
 ]
 
@@ -3597,9 +3597,12 @@ dependencies = [
  "clap",
  "dump",
  "file-store",
+ "indexmap",
  "meilisearch-auth",
  "meilisearch-types",
  "serde",
+ "serde_json",
+ "tempfile",
  "time",
  "uuid",
 ]
@@ -4969,9 +4972,9 @@ dependencies = [
 
 [[package]]
 name = "serde_json"
-version = "1.0.132"
+version = "1.0.133"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
+checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
 dependencies = [
  "indexmap",
  "itoa",
diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml
index 048da6232..7d0b9f32c 100644
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@@ -10,12 +10,15 @@ license.workspace = true
 
 [dependencies]
 anyhow = "1.0.86"
+arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
 clap = { version = "4.5.9", features = ["derive"] }
 dump = { path = "../dump" }
 file-store = { path = "../file-store" }
+indexmap = {version = "2.7.0", features = ["serde"]}
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
+serde_json = {version = "1.0.133", features = ["preserve_order"]}
+tempfile = "3.14.0"
 time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
-arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
diff --git a/crates/meilitool/src/main.rs b/crates/meilitool/src/main.rs
index f84cea98d..44eb4960e 100644
--- a/crates/meilitool/src/main.rs
+++ b/crates/meilitool/src/main.rs
@@ -73,7 +73,7 @@ enum Command {
     ///
     /// Supported upgrade paths:
     ///
-    /// - v1.9.x -> v1.10.x -> v1.11.x
+    /// - v1.9.x -> v1.10.x -> v1.11.x -> v1.12.x
     OfflineUpgrade {
         #[arg(long)]
         target_version: String,
diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs
index 36630c3b3..50882f610 100644
--- a/crates/meilitool/src/upgrade/mod.rs
+++ b/crates/meilitool/src/upgrade/mod.rs
@@ -1,5 +1,6 @@
 mod v1_10;
 mod v1_11;
+mod v1_12;
 mod v1_9;
 
 use std::path::{Path, PathBuf};
@@ -8,6 +9,7 @@ use anyhow::{bail, Context};
 use meilisearch_types::versioning::create_version_file;
 
 use v1_10::v1_9_to_v1_10;
+use v1_12::v1_11_to_v1_12;
 
 use crate::upgrade::v1_11::v1_10_to_v1_11;
 
@@ -22,6 +24,7 @@ impl OfflineUpgrade {
         let upgrade_list = [
             (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
             (v1_10_to_v1_11, "1", "11", "0"),
+            (v1_11_to_v1_12, "1", "12", "0"),
         ];
 
         let (current_major, current_minor, current_patch) = &self.current_version;
@@ -33,6 +36,7 @@ impl OfflineUpgrade {
         ) {
             ("1", "9", _) => 0,
             ("1", "10", _) => 1,
+            ("1", "11", _) => 2,
             _ => {
                 bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10")
             }
@@ -43,6 +47,7 @@ impl OfflineUpgrade {
         let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
             ("1", "10", _) => 0,
             ("1", "11", _) => 1,
+            ("1", "12", _) => 2,
             (major, _, _) if major.starts_with('v') => {
                 bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
             }
diff --git a/crates/meilitool/src/upgrade/v1_12.rs b/crates/meilitool/src/upgrade/v1_12.rs
new file mode 100644
index 000000000..ab97f417b
--- /dev/null
+++ b/crates/meilitool/src/upgrade/v1_12.rs
@@ -0,0 +1,63 @@
+//! The breaking changes that happened between the v1.11 and the v1.12 are:
+//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
+
+use std::{io::BufWriter, path::Path};
+
+use anyhow::Context;
+use file_store::FileStore;
+use indexmap::IndexMap;
+use meilisearch_types::milli::documents::DocumentsBatchReader;
+use serde_json::value::RawValue;
+use tempfile::NamedTempFile;
+
+pub fn v1_11_to_v1_12(db_path: &Path) -> anyhow::Result<()> {
+    println!("Upgrading from v1.11.0 to v1.12.0");
+
+    convert_update_files(db_path)?;
+
+    Ok(())
+}
+
+/// Convert the update files from OBKV to ndjson format.
+///
+/// 1) List all the update files using the file store.
+/// 2) For each update file, read the update file into a DocumentsBatchReader.
+/// 3) For each document in the update file, convert the document to a JSON object.
+/// 4) Write the JSON object to a tmp file in the update files directory.
+/// 5) Persist the tmp file replacing the old update file.
+fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
+    let update_files_dir_path = db_path.join("update_files");
+    let file_store = FileStore::new(&update_files_dir_path)?;
+
+    for uuid in file_store.all_uuids()? {
+        let uuid = uuid?;
+        let update_file_path = file_store.get_update_path(uuid);
+        let update_file = file_store.get_update(uuid)?;
+
+        let mut file = NamedTempFile::new_in(&update_files_dir_path).map(BufWriter::new)?;
+
+        let reader = DocumentsBatchReader::from_reader(update_file)?;
+        let (mut cursor, index) = reader.into_cursor_and_fields_index();
+
+        while let Some(document) = cursor.next_document()? {
+            let mut json_document = IndexMap::new();
+            for (fid, value) in document {
+                let field_name = index
+                    .name(fid)
+                    .with_context(|| format!("while getting field name for fid {fid}"))?;
+                let value: &RawValue = serde_json::from_slice(value)?;
+                json_document.insert(field_name, value);
+            }
+
+            serde_json::to_writer(&mut file, &json_document)?;
+        }
+
+        let file = file
+            .into_inner()
+            .map_err(|e| e.into_error())
+            .context("while flushing update file bufwriter")?;
+        let _ = file.persist(update_file_path)?;
+    }
+
+    Ok(())
+}

From c614d0dd353947c46de2da8635e6e4b8e0b8404c Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Wed, 11 Dec 2024 09:54:34 +0100
Subject: [PATCH 120/158] Add context when returning an error

---
 crates/meilitool/src/upgrade/v1_12.rs | 40 ++++++++++++++++++---------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/crates/meilitool/src/upgrade/v1_12.rs b/crates/meilitool/src/upgrade/v1_12.rs
index ab97f417b..77060d90d 100644
--- a/crates/meilitool/src/upgrade/v1_12.rs
+++ b/crates/meilitool/src/upgrade/v1_12.rs
@@ -27,24 +27,37 @@ pub fn v1_11_to_v1_12(db_path: &Path) -> anyhow::Result<()> {
 /// 5) Persist the tmp file replacing the old update file.
 fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
     let update_files_dir_path = db_path.join("update_files");
-    let file_store = FileStore::new(&update_files_dir_path)?;
+    let file_store = FileStore::new(&update_files_dir_path).with_context(|| {
+        format!("while creating file store for update files dir {update_files_dir_path:?}")
+    })?;
 
-    for uuid in file_store.all_uuids()? {
-        let uuid = uuid?;
+    for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
+        let uuid = uuid.context("while retrieving uuid from file store")?;
         let update_file_path = file_store.get_update_path(uuid);
-        let update_file = file_store.get_update(uuid)?;
+        let update_file = file_store
+            .get_update(uuid)
+            .with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
 
-        let mut file = NamedTempFile::new_in(&update_files_dir_path).map(BufWriter::new)?;
+        let mut file =
+            NamedTempFile::new_in(&update_files_dir_path).map(BufWriter::new).with_context(
+                || format!("while creating bufwriter for update file {update_file_path:?}"),
+            )?;
 
-        let reader = DocumentsBatchReader::from_reader(update_file)?;
+        let reader = DocumentsBatchReader::from_reader(update_file).with_context(|| {
+            format!("while creating documents batch reader for update file {update_file_path:?}")
+        })?;
         let (mut cursor, index) = reader.into_cursor_and_fields_index();
 
-        while let Some(document) = cursor.next_document()? {
+        while let Some(document) = cursor.next_document().with_context(|| {
+            format!(
+                "while reading documents from batch reader for update file {update_file_path:?}"
+            )
+        })? {
             let mut json_document = IndexMap::new();
             for (fid, value) in document {
                 let field_name = index
                     .name(fid)
-                    .with_context(|| format!("while getting field name for fid {fid}"))?;
+                    .with_context(|| format!("while getting field name for fid {fid} for update file {update_file_path:?}"))?;
                 let value: &RawValue = serde_json::from_slice(value)?;
                 json_document.insert(field_name, value);
             }
@@ -52,11 +65,12 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
             serde_json::to_writer(&mut file, &json_document)?;
         }
 
-        let file = file
-            .into_inner()
-            .map_err(|e| e.into_error())
-            .context("while flushing update file bufwriter")?;
-        let _ = file.persist(update_file_path)?;
+        let file = file.into_inner().map_err(|e| e.into_error()).context(format!(
+            "while flushing update file bufwriter for update file {update_file_path:?}"
+        ))?;
+        let _ = file
+            .persist(&update_file_path)
+            .with_context(|| format!("while persisting update file {update_file_path:?}"))?;
     }
 
     Ok(())

From d683f5980ce7232c7e9be4d0b3d3f5aefb0335af Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Tue, 10 Dec 2024 19:19:27 +0100
Subject: [PATCH 121/158] Do not duplicate NDJson when unecessary

---
 crates/file-store/src/lib.rs                  |   8 ++
 .../meilisearch-types/src/document_formats.rs |  21 ++-
 .../src/routes/indexes/documents.rs           | 127 +++++++++++-------
 3 files changed, 98 insertions(+), 58 deletions(-)

diff --git a/crates/file-store/src/lib.rs b/crates/file-store/src/lib.rs
index c8b3849ab..39ed9482b 100644
--- a/crates/file-store/src/lib.rs
+++ b/crates/file-store/src/lib.rs
@@ -136,6 +136,14 @@ pub struct File {
 }
 
 impl File {
+    pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
+        Self { path, file }
+    }
+
+    pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
+        (self.path, self.file)
+    }
+
     pub fn dry_file() -> Result<Self> {
         Ok(Self { path: PathBuf::new(), file: None })
     }
diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs
index c6e8ad907..4820ac523 100644
--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@@ -250,26 +250,25 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
     }
 }
 
-/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
-pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
+/// Reads NDJSON from file and checks it.
+pub fn read_ndjson(input: &File) -> Result<u64> {
     // We memory map to be able to deserialize into a RawMap that
     // does not allocate when possible and only materialize the first/top level.
     let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
-    let mut output = BufWriter::new(output);
-
     let mut bump = Bump::with_capacity(1024 * 1024);
 
     let mut count = 0;
     for result in serde_json::Deserializer::from_slice(&input).into_iter() {
         bump.reset();
-        count += 1;
-        result
-            .and_then(|raw: &RawValue| {
+        match result {
+            Ok(raw) => {
                 // try to deserialize as a map
-                let map = RawMap::from_raw_value(raw, &bump)?;
-                to_writer(&mut output, &map)
-            })
-            .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
+                RawMap::from_raw_value(raw, &bump)
+                    .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
+                count += 1;
+            }
+            Err(e) => return Err(DocumentFormatError::from((PayloadType::Ndjson, e))),
+        }
     }
 
     Ok(count)
diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs
index 47f73ef42..0b18810d7 100644
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@@ -1,5 +1,5 @@
 use std::collections::HashSet;
-use std::io::ErrorKind;
+use std::io::{ErrorKind, Seek as _};
 use std::marker::PhantomData;
 
 use actix_web::http::header::CONTENT_TYPE;
@@ -572,7 +572,7 @@ async fn document_addition(
     index_uid: IndexUid,
     primary_key: Option<String>,
     csv_delimiter: Option<u8>,
-    mut body: Payload,
+    body: Payload,
     method: IndexDocumentsMethod,
     task_id: Option<TaskId>,
     dry_run: bool,
@@ -609,54 +609,54 @@ async fn document_addition(
     };
 
     let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
+    let documents_count = match format {
+        PayloadType::Ndjson => {
+            let (path, file) = update_file.into_parts();
+            let file = match file {
+                Some(file) => {
+                    let (file, path) = file.into_parts();
+                    let mut file = copy_body_to_file(file, body, format).await?;
+                    file.rewind().map_err(|e| {
+                        index_scheduler::Error::FileStore(file_store::Error::IoError(e))
+                    })?;
+                    Some(tempfile::NamedTempFile::from_parts(file, path))
+                }
+                None => None,
+            };
 
-    let temp_file = match tempfile() {
-        Ok(file) => file,
-        Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
+            let documents_count = file
+                .as_ref()
+                .map_or(Ok(0), |ntf| read_ndjson(ntf.as_file()))
+                .map_err(|e| MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
+            let update_file = file_store::File::from_parts(path, file);
+            update_file.persist()?;
+            Ok(documents_count)
+        }
+        PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
+            let temp_file = match tempfile() {
+                Ok(file) => file,
+                Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
+            };
+
+            let read_file = copy_body_to_file(temp_file, body, format).await?;
+            tokio::task::spawn_blocking(move || {
+                let documents_count = match format {
+                    PayloadType::Json => read_json(&read_file, &mut update_file)?,
+                    PayloadType::Csv { delimiter } => {
+                        read_csv(&read_file, &mut update_file, delimiter)?
+                    }
+                    PayloadType::Ndjson => {
+                        unreachable!("We already wrote the user content into the update file")
+                    }
+                };
+                // we NEED to persist the file here because we moved the `udpate_file` in another task.
+                update_file.persist()?;
+                Ok(documents_count)
+            })
+            .await
+        }
     };
 
-    let async_file = File::from_std(temp_file);
-    let mut buffer = BufWriter::new(async_file);
-
-    let mut buffer_write_size: usize = 0;
-    while let Some(result) = body.next().await {
-        let byte = result?;
-
-        if byte.is_empty() && buffer_write_size == 0 {
-            return Err(MeilisearchHttpError::MissingPayload(format));
-        }
-
-        match buffer.write_all(&byte).await {
-            Ok(()) => buffer_write_size += 1,
-            Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
-        }
-    }
-
-    if let Err(e) = buffer.flush().await {
-        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
-    }
-
-    if buffer_write_size == 0 {
-        return Err(MeilisearchHttpError::MissingPayload(format));
-    }
-
-    if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
-        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
-    }
-
-    let read_file = buffer.into_inner().into_std().await;
-    let documents_count = tokio::task::spawn_blocking(move || {
-        let documents_count = match format {
-            PayloadType::Json => read_json(&read_file, &mut update_file)?,
-            PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
-            PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
-        };
-        // we NEED to persist the file here because we moved the `udpate_file` in another task.
-        update_file.persist()?;
-        Ok(documents_count)
-    })
-    .await;
-
     let documents_count = match documents_count {
         Ok(Ok(documents_count)) => documents_count,
         // in this case the file has not possibly be persisted.
@@ -703,6 +703,39 @@ async fn document_addition(
     Ok(task.into())
 }
 
+async fn copy_body_to_file(
+    output: std::fs::File,
+    mut body: Payload,
+    format: PayloadType,
+) -> Result<std::fs::File, MeilisearchHttpError> {
+    let async_file = File::from_std(output);
+    let mut buffer = BufWriter::new(async_file);
+    let mut buffer_write_size: usize = 0;
+    while let Some(result) = body.next().await {
+        let byte = result?;
+
+        if byte.is_empty() && buffer_write_size == 0 {
+            return Err(MeilisearchHttpError::MissingPayload(format));
+        }
+
+        match buffer.write_all(&byte).await {
+            Ok(()) => buffer_write_size += 1,
+            Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
+        }
+    }
+    if let Err(e) = buffer.flush().await {
+        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
+    }
+    if buffer_write_size == 0 {
+        return Err(MeilisearchHttpError::MissingPayload(format));
+    }
+    if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
+        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
+    }
+    let read_file = buffer.into_inner().into_std().await;
+    Ok(read_file)
+}
+
 pub async fn delete_documents_batch(
     index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
     index_uid: web::Path<String>,

From 69c931334fc6387b2ee92b6016762b5dac898be5 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 10:08:49 +0100
Subject: [PATCH 122/158] Fix the error messages categorization with invalid
 NDJson

---
 crates/meilisearch/src/routes/indexes/documents.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs
index 0b18810d7..264365704 100644
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@@ -624,13 +624,12 @@ async fn document_addition(
                 None => None,
             };
 
-            let documents_count = file
-                .as_ref()
-                .map_or(Ok(0), |ntf| read_ndjson(ntf.as_file()))
-                .map_err(|e| MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
+            let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
+                read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
+            })?;
             let update_file = file_store::File::from_parts(path, file);
             update_file.persist()?;
-            Ok(documents_count)
+            Ok(Ok(documents_count))
         }
         PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
             let temp_file = match tempfile() {

From 93fbdc06d3098694e0ce0e21ebde91c8bf92c4d3 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 12:02:38 +0100
Subject: [PATCH 123/158] Use a nonrandom hasher when decoding NDJSON

---
 Cargo.lock                                       | 9 +++++----
 crates/meilisearch-types/Cargo.toml              | 1 +
 crates/meilisearch-types/src/document_formats.rs | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 9476506ec..349bed5db 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3577,6 +3577,7 @@ dependencies = [
  "memmap2",
  "milli",
  "roaring",
+ "rustc-hash 2.1.0",
  "serde",
  "serde-cs",
  "serde_json",
@@ -3676,7 +3677,7 @@ dependencies = [
  "rhai",
  "roaring",
  "rstar",
- "rustc-hash 2.0.0",
+ "rustc-hash 2.1.0",
  "serde",
  "serde_json",
  "slice-group-by",
@@ -4425,7 +4426,7 @@ dependencies = [
  "bytes",
  "rand",
  "ring",
- "rustc-hash 2.0.0",
+ "rustc-hash 2.1.0",
  "rustls",
  "slab",
  "thiserror",
@@ -4798,9 +4799,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
 
 [[package]]
 name = "rustc-hash"
-version = "2.0.0"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
+checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"
 
 [[package]]
 name = "rustc_version"
diff --git a/crates/meilisearch-types/Cargo.toml b/crates/meilisearch-types/Cargo.toml
index e81e6dd35..76d8d11ca 100644
--- a/crates/meilisearch-types/Cargo.toml
+++ b/crates/meilisearch-types/Cargo.toml
@@ -26,6 +26,7 @@ memmap2 = "0.9.4"
 milli = { path = "../milli" }
 bumparaw-collections = "0.1.2"
 roaring = { version = "0.10.7", features = ["serde"] }
+rustc-hash = "2.1.0"
 serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
 serde_json = "1.0.120"
diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs
index 4820ac523..d858b3c17 100644
--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@@ -8,6 +8,7 @@ use bumparaw_collections::RawMap;
 use memmap2::Mmap;
 use milli::documents::Error;
 use milli::Object;
+use rustc_hash::FxBuildHasher;
 use serde::de::{SeqAccess, Visitor};
 use serde::{Deserialize, Deserializer};
 use serde_json::error::Category;
@@ -263,7 +264,7 @@ pub fn read_ndjson(input: &File) -> Result<u64> {
         match result {
             Ok(raw) => {
                 // try to deserialize as a map
-                RawMap::from_raw_value(raw, &bump)
+                RawMap::from_raw_value_and_hasher(raw, FxBuildHasher, &bump)
                     .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
                 count += 1;
             }

From 01bcc601beb72f0011568e164f459309530185f6 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 12:04:29 +0100
Subject: [PATCH 124/158] Use a nonrandom hasher when decoding JSON

---
 crates/meilisearch-types/src/document_formats.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilisearch-types/src/document_formats.rs b/crates/meilisearch-types/src/document_formats.rs
index d858b3c17..70a0e6204 100644
--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@@ -221,7 +221,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
     let mut deserializer = serde_json::Deserializer::from_slice(&input);
     let res = array_each(&mut deserializer, |obj: &RawValue| {
         doc_alloc.reset();
-        let map = RawMap::from_raw_value(obj, &doc_alloc)?;
+        let map = RawMap::from_raw_value_and_hasher(obj, FxBuildHasher, &doc_alloc)?;
         to_writer(&mut out, &map)
     });
     let count = match res {

From 5622b9607d4abd6afbf32ecba3e9e25e8eaa4131 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 12:18:36 +0100
Subject: [PATCH 125/158] Wrap the read NDJSON pass into a tokio blocking

---
 .../src/routes/indexes/documents.rs           | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/crates/meilisearch/src/routes/indexes/documents.rs b/crates/meilisearch/src/routes/indexes/documents.rs
index 264365704..5f79000bd 100644
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@@ -624,12 +624,19 @@ async fn document_addition(
                 None => None,
             };
 
-            let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
-                read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
-            })?;
-            let update_file = file_store::File::from_parts(path, file);
-            update_file.persist()?;
-            Ok(Ok(documents_count))
+            let documents_count = tokio::task::spawn_blocking(move || {
+                let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
+                    read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
+                })?;
+
+                let update_file = file_store::File::from_parts(path, file);
+                update_file.persist()?;
+
+                Ok(documents_count)
+            })
+            .await?;
+
+            Ok(documents_count)
         }
         PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
             let temp_file = match tempfile() {

From 5c492031d9155139191e1b175259db86f7aead06 Mon Sep 17 00:00:00 2001
From: Many the fish <many@meilisearch.com>
Date: Wed, 11 Dec 2024 14:34:18 +0100
Subject: [PATCH 126/158] Update crates/meilitool/src/upgrade/v1_12.rs

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/meilitool/src/upgrade/v1_12.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/crates/meilitool/src/upgrade/v1_12.rs b/crates/meilitool/src/upgrade/v1_12.rs
index 77060d90d..85fb41472 100644
--- a/crates/meilitool/src/upgrade/v1_12.rs
+++ b/crates/meilitool/src/upgrade/v1_12.rs
@@ -69,6 +69,7 @@ fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
             "while flushing update file bufwriter for update file {update_file_path:?}"
         ))?;
         let _ = file
+            // atomically replace the obkv file with the rewritten NDJSON file
             .persist(&update_file_path)
             .with_context(|| format!("while persisting update file {update_file_path:?}"))?;
     }

From 04a62d2b97e6333645e6b1ba898bb02efdb11877 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 14:57:07 +0100
Subject: [PATCH 127/158] Compile Meilisearch or run the dedicated binary file

---
 crates/xtask/src/bench/meili_process.rs | 11 +----------
 crates/xtask/src/bench/mod.rs           |  5 +++++
 crates/xtask/src/bench/workload.rs      | 25 ++++++++++++++++++++++---
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/crates/xtask/src/bench/meili_process.rs b/crates/xtask/src/bench/meili_process.rs
index 99f6f4ea6..db787e595 100644
--- a/crates/xtask/src/bench/meili_process.rs
+++ b/crates/xtask/src/bench/meili_process.rs
@@ -37,17 +37,8 @@ pub async fn start(
     master_key: Option<&str>,
     workload: &Workload,
     asset_folder: &str,
+    mut command: tokio::process::Command,
 ) -> anyhow::Result<tokio::process::Child> {
-    let mut command = tokio::process::Command::new("cargo");
-    command
-        .arg("run")
-        .arg("--release")
-        .arg("-p")
-        .arg("meilisearch")
-        .arg("--bin")
-        .arg("meilisearch")
-        .arg("--");
-
     command.arg("--db-path").arg("./_xtask_benchmark.ms");
     if let Some(master_key) = master_key {
         command.arg("--master-key").arg(master_key);
diff --git a/crates/xtask/src/bench/mod.rs b/crates/xtask/src/bench/mod.rs
index deec120fa..491dc33ab 100644
--- a/crates/xtask/src/bench/mod.rs
+++ b/crates/xtask/src/bench/mod.rs
@@ -86,6 +86,10 @@ pub struct BenchDeriveArgs {
     /// The maximum time in seconds we allow for fetching the task queue before timing out.
     #[arg(long, default_value_t = 60)]
     tasks_queue_timeout_secs: u64,
+
+    /// The path to the binary to run. By default it compiles the binary with cargo.
+    #[arg(long)]
+    binary_path: Option<PathBuf>,
 }
 
 pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
@@ -170,6 +174,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                     args.master_key.as_deref(),
                     workload,
                     &args,
+                    args.binary_path.as_deref(),
                 )
                 .await?;
 
diff --git a/crates/xtask/src/bench/workload.rs b/crates/xtask/src/bench/workload.rs
index 19c8bfae8..649bd0eaf 100644
--- a/crates/xtask/src/bench/workload.rs
+++ b/crates/xtask/src/bench/workload.rs
@@ -1,6 +1,7 @@
 use std::collections::BTreeMap;
 use std::fs::File;
 use std::io::{Seek as _, Write as _};
+use std::path::Path;
 
 use anyhow::{bail, Context as _};
 use futures_util::TryStreamExt as _;
@@ -85,14 +86,30 @@ pub async fn execute(
     master_key: Option<&str>,
     workload: Workload,
     args: &BenchDeriveArgs,
+    binary_path: Option<&Path>,
 ) -> anyhow::Result<()> {
     assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
 
     let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;
 
     let mut tasks = Vec::new();
-
     for i in 0..workload.run_count {
+        let run_command = match binary_path {
+            Some(binary_path) => tokio::process::Command::new(binary_path),
+            None => {
+                let mut command = tokio::process::Command::new("cargo");
+                command
+                    .arg("run")
+                    .arg("--release")
+                    .arg("-p")
+                    .arg("meilisearch")
+                    .arg("--bin")
+                    .arg("meilisearch")
+                    .arg("--");
+                command
+            }
+        };
+
         tasks.push(
             execute_run(
                 dashboard_client,
@@ -102,6 +119,7 @@ pub async fn execute(
                 master_key,
                 &workload,
                 args,
+                run_command,
                 i,
             )
             .await?,
@@ -109,7 +127,6 @@ pub async fn execute(
     }
 
     let mut reports = Vec::with_capacity(workload.run_count as usize);
-
     for task in tasks {
         reports.push(
             task.await
@@ -133,13 +150,15 @@ async fn execute_run(
     master_key: Option<&str>,
     workload: &Workload,
     args: &BenchDeriveArgs,
+    run_command: tokio::process::Command,
     run_number: u16,
 ) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
     meili_process::delete_db();
 
     meili_process::build().await?;
     let meilisearch =
-        meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?;
+        meili_process::start(meili_client, master_key, workload, &args.asset_folder, run_command)
+            .await?;
 
     let processor = run_commands(
         dashboard_client,

From bfca54cc2cd5cc65d54dfeb7aa9b58103d0464b7 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 11 Dec 2024 15:26:18 +0100
Subject: [PATCH 128/158] Return docid in case of errors while rendering the
 document template

---
 crates/milli/src/prompt/error.rs                   | 12 ++++++++++++
 crates/milli/src/prompt/mod.rs                     | 10 +++++++---
 crates/milli/src/update/new/extract/vectors/mod.rs |  6 ++++++
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/crates/milli/src/prompt/error.rs b/crates/milli/src/prompt/error.rs
index 8a762b60a..a92e2fdc3 100644
--- a/crates/milli/src/prompt/error.rs
+++ b/crates/milli/src/prompt/error.rs
@@ -38,6 +38,16 @@ pub struct RenderPromptError {
     pub fault: FaultSource,
 }
 impl RenderPromptError {
+    pub(crate) fn missing_context_with_external_docid(
+        external_docid: String,
+        inner: liquid::Error,
+    ) -> RenderPromptError {
+        Self {
+            kind: RenderPromptErrorKind::MissingContextWithExternalDocid(external_docid, inner),
+            fault: FaultSource::User,
+        }
+    }
+
     pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
         Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
     }
@@ -47,6 +57,8 @@ impl RenderPromptError {
 pub enum RenderPromptErrorKind {
     #[error("missing field in document: {0}")]
     MissingContext(liquid::Error),
+    #[error("missing field in document `{0}`: {1}")]
+    MissingContextWithExternalDocid(String, liquid::Error),
 }
 
 impl From<RenderPromptError> for crate::Error {
diff --git a/crates/milli/src/prompt/mod.rs b/crates/milli/src/prompt/mod.rs
index bbcf054e6..3eb91611e 100644
--- a/crates/milli/src/prompt/mod.rs
+++ b/crates/milli/src/prompt/mod.rs
@@ -119,6 +119,7 @@ impl Prompt {
         'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
     >(
         &self,
+        external_docid: &str,
         document: impl crate::update::new::document::Document<'a> + Debug,
         field_id_map: &RefCell<GlobalFieldsIdsMap>,
         doc_alloc: &'doc Bump,
@@ -130,9 +131,12 @@ impl Prompt {
             self.max_bytes.unwrap_or_else(default_max_bytes).get(),
             doc_alloc,
         );
-        self.template
-            .render_to(&mut rendered, &context)
-            .map_err(RenderPromptError::missing_context)?;
+        self.template.render_to(&mut rendered, &context).map_err(|liquid_error| {
+            RenderPromptError::missing_context_with_external_docid(
+                external_docid.to_owned(),
+                liquid_error,
+            )
+        })?;
         Ok(std::str::from_utf8(rendered.into_bump_slice())
             .expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
     }
diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs
index 1110432fa..2a72a1650 100644
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@@ -130,6 +130,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                 );
                             } else if new_vectors.regenerate {
                                 let new_rendered = prompt.render_document(
+                                    update.external_document_id(),
                                     update.current(
                                         &context.rtxn,
                                         context.index,
@@ -139,6 +140,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                     &context.doc_alloc,
                                 )?;
                                 let old_rendered = prompt.render_document(
+                                    update.external_document_id(),
                                     update.merged(
                                         &context.rtxn,
                                         context.index,
@@ -158,6 +160,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                             }
                         } else if old_vectors.regenerate {
                             let old_rendered = prompt.render_document(
+                                update.external_document_id(),
                                 update.current(
                                     &context.rtxn,
                                     context.index,
@@ -167,6 +170,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                 &context.doc_alloc,
                             )?;
                             let new_rendered = prompt.render_document(
+                                update.external_document_id(),
                                 update.merged(
                                     &context.rtxn,
                                     context.index,
@@ -216,6 +220,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                 );
                             } else if new_vectors.regenerate {
                                 let rendered = prompt.render_document(
+                                    insertion.external_document_id(),
                                     insertion.inserted(),
                                     context.new_fields_ids_map,
                                     &context.doc_alloc,
@@ -229,6 +234,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                             }
                         } else {
                             let rendered = prompt.render_document(
+                                insertion.external_document_id(),
                                 insertion.inserted(),
                                 context.new_fields_ids_map,
                                 &context.doc_alloc,

From eaa897d983d2c71b6f76a453f1739980ba980558 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 15:57:16 +0100
Subject: [PATCH 129/158] Avoid compiling when unecessary

---
 crates/xtask/src/bench/workload.rs | 38 +++++++++++++++---------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/crates/xtask/src/bench/workload.rs b/crates/xtask/src/bench/workload.rs
index 649bd0eaf..39119428f 100644
--- a/crates/xtask/src/bench/workload.rs
+++ b/crates/xtask/src/bench/workload.rs
@@ -94,22 +94,6 @@ pub async fn execute(
 
     let mut tasks = Vec::new();
     for i in 0..workload.run_count {
-        let run_command = match binary_path {
-            Some(binary_path) => tokio::process::Command::new(binary_path),
-            None => {
-                let mut command = tokio::process::Command::new("cargo");
-                command
-                    .arg("run")
-                    .arg("--release")
-                    .arg("-p")
-                    .arg("meilisearch")
-                    .arg("--bin")
-                    .arg("meilisearch")
-                    .arg("--");
-                command
-            }
-        };
-
         tasks.push(
             execute_run(
                 dashboard_client,
@@ -119,7 +103,7 @@ pub async fn execute(
                 master_key,
                 &workload,
                 args,
-                run_command,
+                binary_path,
                 i,
             )
             .await?,
@@ -150,12 +134,28 @@ async fn execute_run(
     master_key: Option<&str>,
     workload: &Workload,
     args: &BenchDeriveArgs,
-    run_command: tokio::process::Command,
+    binary_path: Option<&Path>,
     run_number: u16,
 ) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
     meili_process::delete_db();
 
-    meili_process::build().await?;
+    let run_command = match binary_path {
+        Some(binary_path) => tokio::process::Command::new(binary_path),
+        None => {
+            meili_process::build().await?;
+            let mut command = tokio::process::Command::new("cargo");
+            command
+                .arg("run")
+                .arg("--release")
+                .arg("-p")
+                .arg("meilisearch")
+                .arg("--bin")
+                .arg("meilisearch")
+                .arg("--");
+            command
+        }
+    };
+
     let meilisearch =
         meili_process::start(meili_client, master_key, workload, &args.asset_folder, run_command)
             .await?;

From df9b68f8ed965f6d37a3c186ba7e4255a5640dfe Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 10 Dec 2024 16:30:48 +0100
Subject: [PATCH 130/158] inital implementation of the progress

---
 crates/benchmarks/benches/indexing.rs         | 123 +++++------
 crates/benchmarks/benches/utils.rs            |   5 +-
 crates/fuzzers/src/bin/fuzz-indexing.rs       |   5 +-
 crates/index-scheduler/src/batch.rs           |  63 ++----
 crates/index-scheduler/src/insta_snapshot.rs  |   2 +-
 crates/index-scheduler/src/lib.rs             |  74 ++-----
 crates/index-scheduler/src/processing.rs      | 205 ++++++++++++++++++
 crates/index-scheduler/src/utils.rs           |   7 +-
 crates/meilisearch-types/src/batch_view.rs    |   3 +
 crates/meilisearch-types/src/batches.rs       |   3 +
 crates/meilisearch-types/src/tasks.rs         |  57 -----
 crates/milli/src/index.rs                     |  13 +-
 crates/milli/src/lib.rs                       |   1 +
 crates/milli/src/progress.rs                  | 116 ++++++++++
 .../milli/src/search/new/tests/integration.rs |   5 +-
 .../milli/src/update/index_documents/mod.rs   |  47 ++--
 .../new/extract/faceted/extract_facets.rs     |  20 +-
 crates/milli/src/update/new/extract/mod.rs    |  13 +-
 .../extract/searchable/extract_word_docids.rs |  20 +-
 .../src/update/new/extract/searchable/mod.rs  |  18 +-
 .../update/new/indexer/document_changes.rs    |  53 +----
 .../update/new/indexer/document_deletion.rs   |   7 +-
 .../update/new/indexer/document_operation.rs  |  29 ++-
 crates/milli/src/update/new/indexer/mod.rs    |  43 ++--
 crates/milli/src/update/new/steps.rs          |  47 ++--
 .../milli/tests/search/facet_distribution.rs  |   5 +-
 crates/milli/tests/search/mod.rs              |   5 +-
 crates/milli/tests/search/query_criteria.rs   |   5 +-
 crates/milli/tests/search/typo_tolerance.rs   |   5 +-
 29 files changed, 585 insertions(+), 414 deletions(-)
 create mode 100644 crates/index-scheduler/src/processing.rs
 create mode 100644 crates/milli/src/progress.rs

diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs
index 870e56686..4acd7b22a 100644
--- a/crates/benchmarks/benches/indexing.rs
+++ b/crates/benchmarks/benches/indexing.rs
@@ -8,6 +8,7 @@ use bumpalo::Bump;
 use criterion::{criterion_group, criterion_main, Criterion};
 use milli::documents::PrimaryKey;
 use milli::heed::{EnvOpenOptions, RwTxn};
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
             &document_changes,
             EmbeddingConfigs::default(),
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
 
@@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
@@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                         None,
                         &mut new_fields_ids_map,
                         &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                     )
                     .unwrap();
 
@@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                     &document_changes,
                     EmbeddingConfigs::default(),
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
                 .unwrap();
 
diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs
index 09c49ed69..889478d40 100644
--- a/crates/benchmarks/benches/utils.rs
+++ b/crates/benchmarks/benches/utils.rs
@@ -10,6 +10,7 @@ use bumpalo::Bump;
 use criterion::BenchmarkId;
 use memmap2::Mmap;
 use milli::heed::EnvOpenOptions;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index {
             None,
             &mut new_fields_ids_map,
             &|| false,
-            &|_progress| (),
+            Progress::default(),
         )
         .unwrap();
 
@@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index {
         &document_changes,
         EmbeddingConfigs::default(),
         &|| false,
-        &|_| (),
+        &Progress::default(),
     )
     .unwrap();
 
diff --git a/crates/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs
index ee927940f..08711e5e3 100644
--- a/crates/fuzzers/src/bin/fuzz-indexing.rs
+++ b/crates/fuzzers/src/bin/fuzz-indexing.rs
@@ -10,6 +10,7 @@ use either::Either;
 use fuzzers::Operation;
 use milli::documents::mmap_from_objects;
 use milli::heed::EnvOpenOptions;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig};
 use milli::vector::EmbeddingConfigs;
@@ -128,7 +129,7 @@ fn main() {
                                     None,
                                     &mut new_fields_ids_map,
                                     &|| false,
-                                    &|_progress| (),
+                                    Progress::default(),
                                 )
                                 .unwrap();
 
@@ -143,7 +144,7 @@ fn main() {
                                 &document_changes,
                                 embedders,
                                 &|| false,
-                                &|_| (),
+                                &Progress::default(),
                             )
                             .unwrap();
 
diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 93e9a1404..1bfa7f53b 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -22,8 +22,6 @@ use std::ffi::OsStr;
 use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;
-use std::sync::atomic::{self, AtomicU64};
-use std::time::Duration;
 
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
@@ -32,6 +30,7 @@ use meilisearch_types::batches::BatchId;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
 use meilisearch_types::milli::heed::CompactionOption;
+use meilisearch_types::milli::progress::Progress;
 use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
 use meilisearch_types::milli::update::{
     DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
@@ -41,9 +40,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{
 };
 use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
-use meilisearch_types::tasks::{
-    Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress,
-};
+use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
 use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
 use roaring::RoaringBitmap;
 use time::macros::format_description;
@@ -561,11 +558,12 @@ impl IndexScheduler {
     /// The list of tasks that were processed. The metadata of each task in the returned
     /// list is updated accordingly, with the exception of the its date fields
     /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
-    #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
+    #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
     pub(crate) fn process_batch(
         &self,
         batch: Batch,
         current_batch: &mut ProcessingBatch,
+        progress: Progress,
     ) -> Result<Vec<Task>> {
         #[cfg(test)]
         {
@@ -953,7 +951,7 @@ impl IndexScheduler {
                     .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
 
                 let mut index_wtxn = index.write_txn()?;
-                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
+                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
 
                 {
                     let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
@@ -996,6 +994,7 @@ impl IndexScheduler {
                 self.process_batch(
                     Batch::IndexUpdate { index_uid, primary_key, task },
                     current_batch,
+                    progress,
                 )
             }
             Batch::IndexUpdate { index_uid, primary_key, mut task } => {
@@ -1168,7 +1167,7 @@ impl IndexScheduler {
     /// The list of processed tasks.
     #[tracing::instrument(
         level = "trace",
-        skip(self, index_wtxn, index),
+        skip(self, index_wtxn, index, progress),
         target = "indexing::scheduler"
     )]
     fn apply_index_operation<'i>(
@@ -1176,44 +1175,12 @@ impl IndexScheduler {
         index_wtxn: &mut RwTxn<'i>,
         index: &'i Index,
         operation: IndexOperation,
+        progress: Progress,
     ) -> Result<Vec<Task>> {
         let indexer_alloc = Bump::new();
 
         let started_processing_at = std::time::Instant::now();
-        let secs_since_started_processing_at = AtomicU64::new(0);
-        const PRINT_SECS_DELTA: u64 = 5;
-
-        let processing_tasks = self.processing_tasks.clone();
         let must_stop_processing = self.must_stop_processing.clone();
-        let send_progress = |progress| {
-            let now = std::time::Instant::now();
-            let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
-            let previous = started_processing_at + Duration::from_secs(elapsed);
-            let elapsed = now - previous;
-
-            if elapsed.as_secs() < PRINT_SECS_DELTA {
-                return;
-            }
-
-            secs_since_started_processing_at
-                .store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
-
-            let TaskProgress {
-                current_step,
-                finished_steps,
-                total_steps,
-                finished_substeps,
-                total_substeps,
-            } = processing_tasks.write().unwrap().update_progress(progress);
-
-            tracing::info!(
-                current_step,
-                finished_steps,
-                total_steps,
-                finished_substeps,
-                total_substeps
-            );
-        };
 
         match operation {
             IndexOperation::DocumentClear { index_uid, mut tasks } => {
@@ -1308,7 +1275,7 @@ impl IndexScheduler {
                         primary_key.as_deref(),
                         &mut new_fields_ids_map,
                         &|| must_stop_processing.get(),
-                        &send_progress,
+                        progress.clone(),
                     )
                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
 
@@ -1356,7 +1323,7 @@ impl IndexScheduler {
                         &document_changes,
                         embedders,
                         &|| must_stop_processing.get(),
-                        &send_progress,
+                        &progress,
                     )
                     .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
 
@@ -1470,7 +1437,7 @@ impl IndexScheduler {
                         &document_changes,
                         embedders,
                         &|| must_stop_processing.get(),
-                        &send_progress,
+                        &progress,
                     )
                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
@@ -1621,7 +1588,7 @@ impl IndexScheduler {
                         &document_changes,
                         embedders,
                         &|| must_stop_processing.get(),
-                        &send_progress,
+                        &progress,
                     )
                     .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
 
@@ -1673,12 +1640,14 @@ impl IndexScheduler {
                         index_uid: index_uid.clone(),
                         tasks: cleared_tasks,
                     },
+                    progress.clone(),
                 )?;
 
                 let settings_tasks = self.apply_index_operation(
                     index_wtxn,
                     index,
                     IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
+                    progress,
                 )?;
 
                 let mut tasks = settings_tasks;
@@ -1702,8 +1671,8 @@ impl IndexScheduler {
 
         let all_task_ids = self.all_task_ids(wtxn)?;
         let mut to_delete_tasks = all_task_ids & matched_tasks;
-        to_delete_tasks -= processing_tasks;
-        to_delete_tasks -= enqueued_tasks;
+        to_delete_tasks -= &**processing_tasks;
+        to_delete_tasks -= &enqueued_tasks;
 
         // 2. We now have a list of tasks to delete, delete them
 
diff --git a/crates/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs
index bcd5966b5..67627d8c1 100644
--- a/crates/index-scheduler/src/insta_snapshot.rs
+++ b/crates/index-scheduler/src/insta_snapshot.rs
@@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
 
 pub fn snapshot_batch(batch: &Batch) -> String {
     let mut snap = String::new();
-    let Batch { uid, details, stats, started_at, finished_at } = batch;
+    let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
     if let Some(finished_at) = finished_at {
         assert!(finished_at > started_at);
     }
diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index e780b21a1..f5f73087d 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -26,6 +26,7 @@ mod index_mapper;
 #[cfg(test)]
 mod insta_snapshot;
 mod lru;
+mod processing;
 mod utils;
 pub mod uuid_codec;
 
@@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
 use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
 use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::index::IndexEmbeddingConfig;
-use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::task_view::TaskView;
-use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress};
+use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use processing::ProcessingTasks;
 use rayon::current_num_threads;
 use rayon::prelude::{IntoParallelIterator, ParallelIterator};
 use roaring::RoaringBitmap;
@@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
 use uuid::Uuid;
 
 use crate::index_mapper::IndexMapper;
-use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch};
+use crate::processing::{AtomicTaskStep, BatchProgress};
+use crate::utils::{check_index_swap_validity, clamp_to_page_size};
 
 pub(crate) type BEI128 = I128<BE>;
 
@@ -163,48 +165,6 @@ impl Query {
     }
 }
 
-#[derive(Debug, Clone)]
-pub struct ProcessingTasks {
-    batch: Option<ProcessingBatch>,
-    /// The list of tasks ids that are currently running.
-    processing: RoaringBitmap,
-    /// The progress on processing tasks
-    progress: Option<TaskProgress>,
-}
-
-impl ProcessingTasks {
-    /// Creates an empty `ProcessingAt` struct.
-    fn new() -> ProcessingTasks {
-        ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
-    }
-
-    /// Stores the currently processing tasks, and the date time at which it started.
-    fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
-        self.batch = Some(processing_batch);
-        self.processing = processing;
-    }
-
-    fn update_progress(&mut self, progress: Progress) -> TaskProgress {
-        self.progress.get_or_insert_with(TaskProgress::default).update(progress)
-    }
-
-    /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) -> Self {
-        self.progress = None;
-
-        Self {
-            batch: std::mem::take(&mut self.batch),
-            processing: std::mem::take(&mut self.processing),
-            progress: None,
-        }
-    }
-
-    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
-    fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
-        !self.processing.is_disjoint(canceled_tasks)
-    }
-}
-
 #[derive(Default, Clone, Debug)]
 struct MustStopProcessing(Arc<AtomicBool>);
 
@@ -813,7 +773,7 @@ impl IndexScheduler {
             let mut batch_tasks = RoaringBitmap::new();
             for batch_uid in batch_uids {
                 if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
-                    batch_tasks |= &processing_tasks;
+                    batch_tasks |= &*processing_tasks;
                 } else {
                     batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
                 }
@@ -827,13 +787,13 @@ impl IndexScheduler {
                 match status {
                     // special case for Processing tasks
                     Status::Processing => {
-                        status_tasks |= &processing_tasks;
+                        status_tasks |= &*processing_tasks;
                     }
                     status => status_tasks |= &self.get_status(rtxn, *status)?,
                 };
             }
             if !status.contains(&Status::Processing) {
-                tasks -= &processing_tasks;
+                tasks -= &*processing_tasks;
             }
             tasks &= status_tasks;
         }
@@ -882,7 +842,7 @@ impl IndexScheduler {
         // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
         tasks = {
             let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
-                (&tasks - &processing_tasks, &tasks & &processing_tasks);
+                (&tasks - &*processing_tasks, &tasks & &*processing_tasks);
 
             // special case for Processing tasks
             // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
@@ -1090,7 +1050,7 @@ impl IndexScheduler {
         // Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
         batches = {
             let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
-                (&batches - &processing.processing, &batches & &processing.processing);
+                (&batches - &*processing.processing, &batches & &*processing.processing);
 
             // special case for Processing batches
             // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
@@ -1606,7 +1566,8 @@ impl IndexScheduler {
 
         // We reset the must_stop flag to be sure that we don't stop processing tasks
         self.must_stop_processing.reset();
-        self.processing_tasks
+        let progress = self
+            .processing_tasks
             .write()
             .unwrap()
             // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
@@ -1619,11 +1580,12 @@ impl IndexScheduler {
         let res = {
             let cloned_index_scheduler = self.private_clone();
             let processing_batch = &mut processing_batch;
+            let progress = progress.clone();
             std::thread::scope(|s| {
                 let handle = std::thread::Builder::new()
                     .name(String::from("batch-operation"))
                     .spawn_scoped(s, move || {
-                        cloned_index_scheduler.process_batch(batch, processing_batch)
+                        cloned_index_scheduler.process_batch(batch, processing_batch, progress)
                     })
                     .unwrap();
                 handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
@@ -1636,6 +1598,7 @@ impl IndexScheduler {
         #[cfg(test)]
         self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
 
+        progress.update_progress(BatchProgress::WritingTasksToDisk);
         processing_batch.finished();
         let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
         let mut canceled = RoaringBitmap::new();
@@ -1645,12 +1608,15 @@ impl IndexScheduler {
                 #[cfg(test)]
                 self.breakpoint(Breakpoint::ProcessBatchSucceeded);
 
+                let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
+                progress.update_progress(task_progress_obj);
                 let mut success = 0;
                 let mut failure = 0;
                 let mut canceled_by = None;
 
                 #[allow(unused_variables)]
                 for (i, mut task) in tasks.into_iter().enumerate() {
+                    task_progress.fetch_add(1, Ordering::Relaxed);
                     processing_batch.update(&mut task);
                     if task.status == Status::Canceled {
                         canceled.insert(task.uid);
@@ -1718,8 +1684,12 @@ impl IndexScheduler {
             Err(err) => {
                 #[cfg(test)]
                 self.breakpoint(Breakpoint::ProcessBatchFailed);
+                let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
+                progress.update_progress(task_progress_obj);
+
                 let error: ResponseError = err.into();
                 for id in ids.iter() {
+                    task_progress.fetch_add(1, Ordering::Relaxed);
                     let mut task = self
                         .get_task(&wtxn, id)
                         .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
new file mode 100644
index 000000000..e5e892927
--- /dev/null
+++ b/crates/index-scheduler/src/processing.rs
@@ -0,0 +1,205 @@
+use crate::utils::ProcessingBatch;
+use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
+use roaring::RoaringBitmap;
+use std::{borrow::Cow, sync::Arc};
+
+#[derive(Clone)]
+pub struct ProcessingTasks {
+    pub batch: Option<Arc<ProcessingBatch>>,
+    /// The list of tasks ids that are currently running.
+    pub processing: Arc<RoaringBitmap>,
+    /// The progress on processing tasks
+    pub progress: Option<Progress>,
+}
+
+impl ProcessingTasks {
+    /// Creates an empty `ProcessingAt` struct.
+    pub fn new() -> ProcessingTasks {
+        ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
+    }
+
+    pub fn get_progress_view(&self) -> Option<ProgressView> {
+        Some(self.progress.as_ref()?.as_progress_view())
+    }
+
+    /// Stores the currently processing tasks, and the date time at which it started.
+    pub fn start_processing(
+        &mut self,
+        processing_batch: ProcessingBatch,
+        processing: RoaringBitmap,
+    ) -> Progress {
+        self.batch = Some(Arc::new(processing_batch));
+        self.processing = Arc::new(processing);
+        let progress = Progress::default();
+        progress.update_progress(BatchProgress::ProcessingTasks);
+        self.progress = Some(progress.clone());
+
+        progress
+    }
+
+    /// Set the processing tasks to an empty list
+    pub fn stop_processing(&mut self) -> Self {
+        self.progress = None;
+
+        Self {
+            batch: std::mem::take(&mut self.batch),
+            processing: std::mem::take(&mut self.processing),
+            progress: None,
+        }
+    }
+
+    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
+    pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
+        !self.processing.is_disjoint(canceled_tasks)
+    }
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone)]
+pub enum BatchProgress {
+    ProcessingTasks,
+    WritingTasksToDisk,
+}
+
+impl Step for BatchProgress {
+    fn name(&self) -> Cow<'static, str> {
+        match self {
+            BatchProgress::ProcessingTasks => Cow::Borrowed("processing tasks"),
+            BatchProgress::WritingTasksToDisk => Cow::Borrowed("writing tasks to disk"),
+        }
+    }
+
+    fn current(&self) -> u32 {
+        *self as u8 as u32
+    }
+
+    fn total(&self) -> u32 {
+        2
+    }
+}
+
+#[derive(Default)]
+pub struct Task {}
+
+impl NamedStep for Task {
+    fn name(&self) -> &'static str {
+        "task"
+    }
+}
+pub type AtomicTaskStep = AtomicSubStep<Task>;
+
+#[cfg(test)]
+mod test {
+    use std::sync::atomic::Ordering;
+
+    use meili_snap::{json_string, snapshot};
+
+    use super::*;
+
+    #[test]
+    fn one_level() {
+        let mut processing = ProcessingTasks::new();
+        processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "name": "processing tasks",
+              "finished": 0,
+              "total": 2
+            }
+          ],
+          "percentage": 0.0
+        }
+        "#);
+        processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "name": "writing tasks to disk",
+              "finished": 1,
+              "total": 2
+            }
+          ],
+          "percentage": 50.0
+        }
+        "#);
+    }
+
+    #[test]
+    fn task_progress() {
+        let mut processing = ProcessingTasks::new();
+        processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
+        let (atomic, tasks) = AtomicTaskStep::new(10);
+        processing.progress.as_ref().unwrap().update_progress(tasks);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "name": "processing tasks",
+              "finished": 0,
+              "total": 2
+            },
+            {
+              "name": "task",
+              "finished": 0,
+              "total": 10
+            }
+          ],
+          "percentage": 0.0
+        }
+        "#);
+        atomic.fetch_add(6, Ordering::Relaxed);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "name": "processing tasks",
+              "finished": 0,
+              "total": 2
+            },
+            {
+              "name": "task",
+              "finished": 6,
+              "total": 10
+            }
+          ],
+          "percentage": 30.000002
+        }
+        "#);
+        processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "name": "writing tasks to disk",
+              "finished": 1,
+              "total": 2
+            }
+          ],
+          "percentage": 50.0
+        }
+        "#);
+        let (atomic, tasks) = AtomicTaskStep::new(5);
+        processing.progress.as_ref().unwrap().update_progress(tasks);
+        atomic.fetch_add(4, Ordering::Relaxed);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "name": "writing tasks to disk",
+              "finished": 1,
+              "total": 2
+            },
+            {
+              "name": "task",
+              "finished": 4,
+              "total": 5
+            }
+          ],
+          "percentage": 90.0
+        }
+        "#);
+    }
+}
diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs
index 356d77b35..3718c69ca 100644
--- a/crates/index-scheduler/src/utils.rs
+++ b/crates/index-scheduler/src/utils.rs
@@ -134,6 +134,7 @@ impl ProcessingBatch {
     pub fn to_batch(&self) -> Batch {
         Batch {
             uid: self.uid,
+            progress: None,
             details: self.details.clone(),
             stats: self.stats.clone(),
             started_at: self.started_at,
@@ -187,6 +188,7 @@ impl IndexScheduler {
             &batch.uid,
             &Batch {
                 uid: batch.uid,
+                progress: None,
                 details: batch.details,
                 stats: batch.stats,
                 started_at: batch.started_at,
@@ -273,7 +275,10 @@ impl IndexScheduler {
             .into_iter()
             .map(|batch_id| {
                 if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
-                    Ok(processing.batch.as_ref().unwrap().to_batch())
+                    let mut batch = processing.batch.as_ref().unwrap().to_batch();
+                    println!("here with progress: {}", processing.progress.is_some());
+                    batch.progress = processing.get_progress_view();
+                    Ok(batch)
                 } else {
                     self.get_batch(rtxn, batch_id)
                         .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
diff --git a/crates/meilisearch-types/src/batch_view.rs b/crates/meilisearch-types/src/batch_view.rs
index 5d800d897..a3d7f834f 100644
--- a/crates/meilisearch-types/src/batch_view.rs
+++ b/crates/meilisearch-types/src/batch_view.rs
@@ -1,3 +1,4 @@
+use milli::progress::ProgressView;
 use serde::Serialize;
 use time::{Duration, OffsetDateTime};
 
@@ -11,6 +12,7 @@ use crate::{
 #[serde(rename_all = "camelCase")]
 pub struct BatchView {
     pub uid: BatchId,
+    pub progress: Option<ProgressView>,
     pub details: DetailsView,
     pub stats: BatchStats,
     #[serde(serialize_with = "serialize_duration", default)]
@@ -25,6 +27,7 @@ impl BatchView {
     pub fn from_batch(batch: &Batch) -> Self {
         Self {
             uid: batch.uid,
+            progress: batch.progress.clone(),
             details: batch.details.clone(),
             stats: batch.stats.clone(),
             duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
diff --git a/crates/meilisearch-types/src/batches.rs b/crates/meilisearch-types/src/batches.rs
index a60386e52..57c609320 100644
--- a/crates/meilisearch-types/src/batches.rs
+++ b/crates/meilisearch-types/src/batches.rs
@@ -1,5 +1,6 @@
 use std::collections::BTreeMap;
 
+use milli::progress::ProgressView;
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 
@@ -15,6 +16,8 @@ pub type BatchId = u32;
 pub struct Batch {
     pub uid: BatchId,
 
+    #[serde(skip_deserializing)]
+    pub progress: Option<ProgressView>,
     pub details: DetailsView,
     pub stats: BatchStats,
 
diff --git a/crates/meilisearch-types/src/tasks.rs b/crates/meilisearch-types/src/tasks.rs
index ebd28f526..c62f550ae 100644
--- a/crates/meilisearch-types/src/tasks.rs
+++ b/crates/meilisearch-types/src/tasks.rs
@@ -4,7 +4,6 @@ use std::fmt::{Display, Write};
 use std::str::FromStr;
 
 use enum_iterator::Sequence;
-use milli::update::new::indexer::document_changes::Progress;
 use milli::update::IndexDocumentsMethod;
 use milli::Object;
 use roaring::RoaringBitmap;
@@ -41,62 +40,6 @@ pub struct Task {
     pub kind: KindWithContent,
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskProgress {
-    pub current_step: &'static str,
-    pub finished_steps: u16,
-    pub total_steps: u16,
-    pub finished_substeps: Option<u32>,
-    pub total_substeps: Option<u32>,
-}
-
-impl Default for TaskProgress {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl TaskProgress {
-    pub fn new() -> Self {
-        Self {
-            current_step: "start",
-            finished_steps: 0,
-            total_steps: 1,
-            finished_substeps: None,
-            total_substeps: None,
-        }
-    }
-
-    pub fn update(&mut self, progress: Progress) -> TaskProgress {
-        if self.finished_steps > progress.finished_steps {
-            return *self;
-        }
-
-        if self.current_step != progress.step_name {
-            self.current_step = progress.step_name
-        }
-
-        self.total_steps = progress.total_steps;
-
-        if self.finished_steps < progress.finished_steps {
-            self.finished_substeps = None;
-            self.total_substeps = None;
-        }
-        self.finished_steps = progress.finished_steps;
-        if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
-            if let Some(task_finished_substeps) = self.finished_substeps {
-                if task_finished_substeps > finished_substeps {
-                    return *self;
-                }
-            }
-            self.finished_substeps = Some(finished_substeps);
-            self.total_substeps = Some(total_substeps);
-        }
-        *self
-    }
-}
-
 impl Task {
     pub fn index_uid(&self) -> Option<&str> {
         use KindWithContent::*;
diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs
index 268d33cd9..f60b59c72 100644
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@@ -1734,6 +1734,7 @@ pub(crate) mod tests {
 
     use crate::error::{Error, InternalError};
     use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
+    use crate::progress::Progress;
     use crate::update::new::indexer;
     use crate::update::settings::InnerIndexSettings;
     use crate::update::{
@@ -1810,7 +1811,7 @@ pub(crate) mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )?;
 
             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@@ -1829,7 +1830,7 @@ pub(crate) mod tests {
                     &document_changes,
                     embedders,
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
             })
             .unwrap()?;
@@ -1901,7 +1902,7 @@ pub(crate) mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )?;
 
             if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@@ -1920,7 +1921,7 @@ pub(crate) mod tests {
                     &document_changes,
                     embedders,
                     &|| false,
-                    &|_| (),
+                    &Progress::default(),
                 )
             })
             .unwrap()?;
@@ -1982,7 +1983,7 @@ pub(crate) mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2001,7 +2002,7 @@ pub(crate) mod tests {
                     &document_changes,
                     embedders,
                     &|| should_abort.load(Relaxed),
-                    &|_| (),
+                    &Progress::default(),
                 )
             })
             .unwrap()
diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs
index 1fc876f79..3ae0bfdb9 100644
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@@ -31,6 +31,7 @@ pub mod vector;
 #[macro_use]
 pub mod snapshot_tests;
 mod fieldids_weights_map;
+pub mod progress;
 
 use std::collections::{BTreeMap, HashMap};
 use std::convert::{TryFrom, TryInto};
diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
new file mode 100644
index 000000000..63f0fbef8
--- /dev/null
+++ b/crates/milli/src/progress.rs
@@ -0,0 +1,116 @@
+use std::{
+    any::TypeId,
+    borrow::Cow,
+    sync::{
+        atomic::{AtomicU32, Ordering},
+        Arc, RwLock,
+    },
+};
+
+use serde::Serialize;
+
+pub trait Step: 'static + Send + Sync {
+    fn name(&self) -> Cow<'static, str>;
+    fn current(&self) -> u32;
+    fn total(&self) -> u32;
+}
+
+#[derive(Clone, Default)]
+pub struct Progress {
+    steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
+}
+
+impl Progress {
+    pub fn update_progress<P: Step>(&self, sub_progress: P) {
+        let mut steps = self.steps.write().unwrap();
+        let step_type = TypeId::of::<P>();
+        if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
+            steps.truncate(idx);
+        }
+        steps.push((step_type, Box::new(sub_progress)));
+    }
+
+    // TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
+    pub fn as_progress_view(&self) -> ProgressView {
+        let steps = self.steps.read().unwrap();
+
+        let mut percentage = 0.0;
+        let mut prev_factors = 1.0;
+
+        let mut step_view = Vec::new();
+        for (_, step) in steps.iter() {
+            prev_factors *= step.total() as f32;
+            percentage += step.current() as f32 / prev_factors;
+
+            step_view.push(ProgressStepView {
+                name: step.name(),
+                finished: step.current(),
+                total: step.total(),
+            });
+        }
+
+        ProgressView { steps: step_view, percentage: percentage * 100.0 }
+    }
+}
+
+/// This trait lets you use the AtomicSubStep defined right below.
+/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
+/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
+pub trait NamedStep: 'static + Send + Sync + Default {
+    fn name(&self) -> &'static str;
+}
+
+/// Structure to quickly define steps that need very quick, lockless updating of their current step.
+/// You can use this struct if:
+/// - The name of the step doesn't change
+/// - The total number of steps doesn't change
+pub struct AtomicSubStep<Name: NamedStep> {
+    name: Name,
+    current: Arc<AtomicU32>,
+    total: u32,
+}
+
+impl<Name: NamedStep> AtomicSubStep<Name> {
+    pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
+        let current = Arc::new(AtomicU32::new(0));
+        (current.clone(), Self { current, total, name: Name::default() })
+    }
+}
+
+impl<Name: NamedStep> Step for AtomicSubStep<Name> {
+    fn name(&self) -> Cow<'static, str> {
+        self.name.name().into()
+    }
+
+    fn current(&self) -> u32 {
+        self.current.load(Ordering::Relaxed)
+    }
+
+    fn total(&self) -> u32 {
+        self.total
+    }
+}
+
+#[derive(Default)]
+pub struct Document {}
+
+impl NamedStep for Document {
+    fn name(&self) -> &'static str {
+        "document"
+    }
+}
+
+pub type AtomicDocumentStep = AtomicSubStep<Document>;
+
+#[derive(Debug, Serialize, Clone)]
+pub struct ProgressView {
+    steps: Vec<ProgressStepView>,
+    percentage: f32,
+}
+
+#[derive(Debug, Serialize, Clone)]
+pub struct ProgressStepView {
+    name: Cow<'static, str>,
+    finished: u32,
+    total: u32,
+}
diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs
index 5db5b400b..04d3b6667 100644
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@@ -5,6 +5,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use maplit::{btreemap, hashset};
 
+use crate::progress::Progress;
 use crate::update::new::indexer;
 use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use crate::vector::EmbeddingConfigs;
@@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
             None,
             &mut new_fields_ids_map,
             &|| false,
-            &|_progress| (),
+            Progress::default(),
         )
         .unwrap();
 
@@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
         &document_changes,
         embedders,
         &|| false,
-        &|_| (),
+        &Progress::default(),
     )
     .unwrap();
 
diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs
index 3988b311c..bae8e00b4 100644
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -766,6 +766,7 @@ mod tests {
     use crate::documents::mmap_from_objects;
     use crate::index::tests::TempIndex;
     use crate::index::IndexEmbeddingConfig;
+    use crate::progress::Progress;
     use crate::search::TermsMatchingStrategy;
     use crate::update::new::indexer;
     use crate::update::Setting;
@@ -1964,7 +1965,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2148,7 +2149,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2163,7 +2164,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2210,7 +2211,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2225,7 +2226,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2263,7 +2264,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2278,7 +2279,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2315,7 +2316,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2330,7 +2331,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2369,7 +2370,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2384,7 +2385,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2428,7 +2429,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2443,7 +2444,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2480,7 +2481,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2495,7 +2496,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2532,7 +2533,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2547,7 +2548,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2726,7 +2727,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2741,7 +2742,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2785,7 +2786,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2800,7 +2801,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
@@ -2841,7 +2842,7 @@ mod tests {
                 None,
                 &mut new_fields_ids_map,
                 &|| false,
-                &|_progress| (),
+                Progress::default(),
             )
             .unwrap();
 
@@ -2856,7 +2857,7 @@ mod tests {
             &document_changes,
             embedders,
             &|| false,
-            &|_| (),
+            &Progress::default(),
         )
         .unwrap();
         wtxn.commit().unwrap();
diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
index b865d0a35..66ed6cbfb 100644
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
 use crate::update::new::channel::FieldIdDocidFacetSender;
 use crate::update::new::extract::perm_json_p;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
 use crate::update::new::ref_cell_ext::RefCellExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str {
 
 impl FacetedDocidsExtractor {
     #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
-    pub fn run_extraction<
-        'pl,
-        'fid,
-        'indexer,
-        'index,
-        'extractor,
-        DC: DocumentChanges<'pl>,
-        MSP,
-        SP,
-    >(
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
         grenad_parameters: GrenadParameters,
         document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
         sender: &FieldIdDocidFacetSender,
-        step: Step,
+        step: IndexingStep,
     ) -> Result<Vec<BalancedCaches<'extractor>>>
     where
         MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
     {
         let index = indexing_context.index;
         let rtxn = index.read_txn()?;
diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs
index 0bdf31635..4bcb918e4 100644
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@@ -15,23 +15,22 @@ pub use geo::*;
 pub use searchable::*;
 pub use vectors::EmbeddingExtractor;
 
-use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress};
-use super::steps::Step;
+use super::indexer::document_changes::{DocumentChanges, IndexingContext};
+use super::steps::IndexingStep;
 use super::thread_local::{FullySend, ThreadLocal};
 use crate::update::GrenadParameters;
 use crate::Result;
 
 pub trait DocidsExtractor {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
         grenad_parameters: GrenadParameters,
         document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
     ) -> Result<Vec<BalancedCaches<'extractor>>>
     where
-        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync;
+        MSP: Fn() -> bool + Sync;
 }
 
 /// TODO move in permissive json pointer
diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
index 5e85eb1c8..952ee91e4 100644
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use crate::update::new::extract::cache::BalancedCaches;
 use crate::update::new::extract::perm_json_p::contained_in;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
 use crate::update::new::ref_cell_ext::RefCellExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
 pub struct WordDocidsExtractors;
 
 impl WordDocidsExtractors {
-    pub fn run_extraction<
-        'pl,
-        'fid,
-        'indexer,
-        'index,
-        'extractor,
-        DC: DocumentChanges<'pl>,
-        MSP,
-        SP,
-    >(
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
         grenad_parameters: GrenadParameters,
         document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
     ) -> Result<WordDocidsCaches<'extractor>>
     where
         MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
     {
         let index = indexing_context.index;
         let rtxn = index.read_txn()?;
diff --git a/crates/milli/src/update/new/extract/searchable/mod.rs b/crates/milli/src/update/new/extract/searchable/mod.rs
index 05d2406d9..c4240196a 100644
--- a/crates/milli/src/update/new/extract/searchable/mod.rs
+++ b/crates/milli/src/update/new/extract/searchable/mod.rs
@@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use super::cache::BalancedCaches;
 use super::DocidsExtractor;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
 }
 
 pub trait SearchableExtractor: Sized + Sync {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
         grenad_parameters: GrenadParameters,
         document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
     ) -> Result<Vec<BalancedCaches<'extractor>>>
     where
         MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
     {
         let rtxn = indexing_context.index.read_txn()?;
         let stop_words = indexing_context.index.stop_words(&rtxn)?;
@@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
 }
 
 impl<T: SearchableExtractor> DocidsExtractor for T {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
         grenad_parameters: GrenadParameters,
         document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
     ) -> Result<Vec<BalancedCaches<'extractor>>>
     where
         MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
     {
         Self::run_extraction(
             grenad_parameters,
diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs
index 2a5c25525..f2edfb1f3 100644
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@@ -1,4 +1,5 @@
 use std::cell::{Cell, RefCell};
+use std::sync::atomic::Ordering;
 use std::sync::{Arc, RwLock};
 
 use bumpalo::Bump;
@@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator;
 
 use super::super::document_change::DocumentChange;
 use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
+use crate::progress::{AtomicDocumentStep, Progress};
 use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
 
@@ -133,10 +135,8 @@ pub struct IndexingContext<
     'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
     'index,   // covariant lifetime of the index
     MSP,
-    SP,
 > where
     MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
     pub index: &'index Index,
     pub db_fields_ids_map: &'indexer FieldsIdsMap,
@@ -144,7 +144,8 @@ pub struct IndexingContext<
     pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
     pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
     pub must_stop_processing: &'indexer MSP,
-    pub send_progress: &'indexer SP,
+    // TODO: TAMO: Rename field to progress
+    pub send_progress: &'indexer Progress,
 }
 
 impl<
@@ -152,18 +153,15 @@ impl<
         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
         'index,   // covariant lifetime of the index
         MSP,
-        SP,
     > Copy
     for IndexingContext<
         'fid,     // invariant lifetime of fields ids map
         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
         'index,   // covariant lifetime of the index
         MSP,
-        SP,
     >
 where
     MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
 }
 
@@ -172,18 +170,15 @@ impl<
         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
         'index,   // covariant lifetime of the index
         MSP,
-        SP,
     > Clone
     for IndexingContext<
         'fid,     // invariant lifetime of fields ids map
         'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
         'index,   // covariant lifetime of the index
         MSP,
-        SP,
     >
 where
     MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
     fn clone(&self) -> Self {
         *self
@@ -202,7 +197,6 @@ pub fn extract<
     EX,
     DC: DocumentChanges<'pl>,
     MSP,
-    SP,
 >(
     document_changes: &DC,
     extractor: &EX,
@@ -214,17 +208,17 @@ pub fn extract<
         fields_ids_map_store,
         must_stop_processing,
         send_progress,
-    }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+    }: IndexingContext<'fid, 'indexer, 'index, MSP>,
     extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
     datastore: &'data ThreadLocal<EX::Data>,
-    step: Step,
+    step: IndexingStep,
 ) -> Result<()>
 where
     EX: Extractor<'extractor>,
     MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
     tracing::trace!("We are resetting the extractor allocators");
+    send_progress.update_progress(step);
     // Clean up and reuse the extractor allocs
     for extractor_alloc in extractor_allocs.iter_mut() {
         tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@@ -232,6 +226,8 @@ where
     }
 
     let total_documents = document_changes.len() as u32;
+    let (step, progress_step) = AtomicDocumentStep::new(total_documents);
+    send_progress.update_progress(progress_step);
 
     let pi = document_changes.iter(CHUNK_SIZE);
     pi.enumerate().try_arc_for_each_try_init(
@@ -253,7 +249,7 @@ where
             }
             let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
 
-            (send_progress)(Progress::from_step_substep(step, finished_documents, total_documents));
+            step.store(finished_documents, Ordering::Relaxed);
 
             // Clean up and reuse the document-specific allocator
             context.doc_alloc.reset();
@@ -271,32 +267,7 @@ where
             res
         },
     )?;
-
-    (send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
+    step.store(total_documents, Ordering::Relaxed);
 
     Ok(())
 }
-
-pub struct Progress {
-    pub finished_steps: u16,
-    pub total_steps: u16,
-    pub step_name: &'static str,
-    pub finished_total_substep: Option<(u32, u32)>,
-}
-
-impl Progress {
-    pub fn from_step(step: Step) -> Self {
-        Self {
-            finished_steps: step.finished_steps(),
-            total_steps: Step::total_steps(),
-            step_name: step.name(),
-            finished_total_substep: None,
-        }
-    }
-    pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
-        Self {
-            finished_total_substep: Some((finished_substep, total_substep)),
-            ..Progress::from_step(step)
-        }
-    }
-}
diff --git a/crates/milli/src/update/new/indexer/document_deletion.rs b/crates/milli/src/update/new/indexer/document_deletion.rs
index 518786e6f..33e69e49c 100644
--- a/crates/milli/src/update/new/indexer/document_deletion.rs
+++ b/crates/milli/src/update/new/indexer/document_deletion.rs
@@ -92,11 +92,12 @@ mod test {
 
     use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
     use crate::index::tests::TempIndex;
+    use crate::progress::Progress;
     use crate::update::new::indexer::document_changes::{
         extract, DocumentChangeContext, Extractor, IndexingContext,
     };
     use crate::update::new::indexer::DocumentDeletion;
-    use crate::update::new::steps::Step;
+    use crate::update::new::steps::IndexingStep;
     use crate::update::new::thread_local::{MostlySend, ThreadLocal};
     use crate::update::new::DocumentChange;
     use crate::DocumentId;
@@ -164,7 +165,7 @@ mod test {
             doc_allocs: &doc_allocs,
             fields_ids_map_store: &fields_ids_map_store,
             must_stop_processing: &(|| false),
-            send_progress: &(|_progress| {}),
+            send_progress: &Progress::default(),
         };
 
         for _ in 0..3 {
@@ -176,7 +177,7 @@ mod test {
                 context,
                 &mut extractor_allocs,
                 &datastore,
-                Step::ExtractingDocuments,
+                IndexingStep::ExtractingDocuments,
             )
             .unwrap();
 
diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs
index 0b7ec493e..0ce53d5d2 100644
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -1,3 +1,5 @@
+use std::sync::atomic::Ordering;
+
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
 use bumparaw_collections::RawMap;
@@ -10,11 +12,12 @@ use serde_json::value::RawValue;
 use serde_json::Deserializer;
 
 use super::super::document_change::DocumentChange;
-use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress};
+use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use super::retrieve_or_guess_primary_key;
 use crate::documents::PrimaryKey;
+use crate::progress::{AtomicSubStep, Progress};
 use crate::update::new::document::Versions;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
 use crate::update::new::{Deletion, Insertion, Update};
 use crate::update::{AvailableIds, IndexDocumentsMethod};
@@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> {
 
     #[allow(clippy::too_many_arguments)]
     #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
-    pub fn into_changes<MSP, SP>(
+    pub fn into_changes<MSP>(
         self,
         indexer: &'pl Bump,
         index: &Index,
@@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> {
         primary_key_from_op: Option<&'pl str>,
         new_fields_ids_map: &mut FieldsIdsMap,
         must_stop_processing: &MSP,
-        send_progress: &SP,
+        progress: Progress,
     ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
     where
         MSP: Fn() -> bool,
-        SP: Fn(Progress),
     {
+        progress.update_progress(IndexingStep::PreparingPayloads);
         let Self { operations, method } = self;
 
         let documents_ids = index.documents_ids(rtxn)?;
@@ -68,16 +71,15 @@ impl<'pl> DocumentOperation<'pl> {
         let mut primary_key = None;
 
         let payload_count = operations.len();
+        let (step, progress_step) =
+            AtomicSubStep::<crate::progress::Document>::new(payload_count as u32);
+        progress.update_progress(progress_step);
 
         for (payload_index, operation) in operations.into_iter().enumerate() {
             if must_stop_processing() {
                 return Err(InternalError::AbortedIndexation.into());
             }
-            send_progress(Progress::from_step_substep(
-                Step::PreparingPayloads,
-                payload_index as u32,
-                payload_count as u32,
-            ));
+            step.store(payload_index as u32, Ordering::Relaxed);
 
             let mut bytes = 0;
             let result = match operation {
@@ -118,12 +120,7 @@ impl<'pl> DocumentOperation<'pl> {
             };
             operations_stats.push(PayloadStats { document_count, bytes, error });
         }
-
-        send_progress(Progress::from_step_substep(
-            Step::PreparingPayloads,
-            payload_count as u32,
-            payload_count as u32,
-        ));
+        step.store(payload_count as u32, Ordering::Relaxed);
 
         // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
         let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 601645385..79416bcd5 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -5,7 +5,7 @@ use std::thread::{self, Builder};
 
 use big_s::S;
 use bumparaw_collections::RawMap;
-use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
+use document_changes::{extract, DocumentChanges, IndexingContext};
 pub use document_deletion::DocumentDeletion;
 pub use document_operation::{DocumentOperation, PayloadStats};
 use hashbrown::HashMap;
@@ -22,7 +22,7 @@ use super::channel::*;
 use super::extract::*;
 use super::facet_search_builder::FacetSearchBuilder;
 use super::merger::FacetFieldIdsDelta;
-use super::steps::Step;
+use super::steps::IndexingStep;
 use super::thread_local::ThreadLocal;
 use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
 use super::words_prefix_docids::{
@@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
 use crate::facet::FacetType;
 use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
+use crate::progress::Progress;
 use crate::proximity::ProximityPrecision;
 use crate::update::del_add::DelAdd;
 use crate::update::new::extract::EmbeddingExtractor;
@@ -60,7 +61,7 @@ mod update_by_function;
 ///
 /// TODO return stats
 #[allow(clippy::too_many_arguments)] // clippy: 😝
-pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
+pub fn index<'pl, 'indexer, 'index, DC, MSP>(
     wtxn: &mut RwTxn,
     index: &'index Index,
     pool: &ThreadPoolNoAbort,
@@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
     document_changes: &DC,
     embedders: EmbeddingConfigs,
     must_stop_processing: &'indexer MSP,
-    send_progress: &'indexer SP,
+    send_progress: &'indexer Progress,
 ) -> Result<()>
 where
     DC: DocumentChanges<'pl>,
     MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
     let mut bbbuffers = Vec::new();
     let finished_extraction = AtomicBool::new(false);
@@ -159,7 +159,7 @@ where
                         indexing_context,
                         &mut extractor_allocs,
                         &datastore,
-                        Step::ExtractingDocuments,
+                        IndexingStep::ExtractingDocuments,
                     )?;
                 }
                 {
@@ -191,7 +191,7 @@ where
                                 indexing_context,
                                 &mut extractor_allocs,
                                 &extractor_sender.field_id_docid_facet_sender(),
-                                Step::ExtractingFacets
+                                IndexingStep::ExtractingFacets
                             )?
                     };
 
@@ -224,7 +224,7 @@ where
                             document_changes,
                             indexing_context,
                             &mut extractor_allocs,
-                            Step::ExtractingWords
+                            IndexingStep::ExtractingWords
                         )?
                     };
 
@@ -302,7 +302,7 @@ where
                             document_changes,
                             indexing_context,
                             &mut extractor_allocs,
-                            Step::ExtractingWordProximity,
+                            IndexingStep::ExtractingWordProximity,
                         )?
                     };
 
@@ -338,7 +338,7 @@ where
                             indexing_context,
                             &mut extractor_allocs,
                             &datastore,
-                            Step::ExtractingEmbeddings,
+                            IndexingStep::ExtractingEmbeddings,
                         )?;
                     }
                     {
@@ -371,7 +371,7 @@ where
                             indexing_context,
                             &mut extractor_allocs,
                             &datastore,
-                            Step::WritingGeoPoints
+                            IndexingStep::WritingGeoPoints
                         )?;
                     }
 
@@ -383,9 +383,7 @@ where
                         &indexing_context.must_stop_processing,
                     )?;
                 }
-
-                (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
-
+                indexing_context.send_progress.update_progress(IndexingStep::WritingToDatabase);
                 finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
 
                 Result::Ok((facet_field_ids_delta, index_embeddings))
@@ -485,7 +483,7 @@ where
             )?;
         }
 
-        (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
+        indexing_context.send_progress.update_progress(IndexingStep::WaitingForExtractors);
 
         let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
 
@@ -498,10 +496,9 @@ where
                 break 'vectors;
             }
 
-            (indexing_context.send_progress)(Progress::from_step(
-                Step::WritingEmbeddingsToDatabase,
-            ));
-
+            indexing_context
+                .send_progress
+                .update_progress(IndexingStep::WritingEmbeddingsToDatabase);
             let mut rng = rand::rngs::StdRng::seed_from_u64(42);
             for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
                 let dimensions = *dimensions;
@@ -517,21 +514,19 @@ where
             index.put_embedding_configs(wtxn, index_embeddings)?;
         }
 
-        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
-
+        indexing_context.send_progress.update_progress(IndexingStep::PostProcessingFacets);
         if index.facet_search(wtxn)? {
             compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
         }
 
         compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
 
-        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
-
+        indexing_context.send_progress.update_progress(IndexingStep::PostProcessingWords);
         if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
             compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
         }
 
-        (indexing_context.send_progress)(Progress::from_step(Step::Finalizing));
+        indexing_context.send_progress.update_progress(IndexingStep::Finalizing);
 
         Ok(()) as Result<_>
     })?;
diff --git a/crates/milli/src/update/new/steps.rs b/crates/milli/src/update/new/steps.rs
index bee1be260..9eb7d376d 100644
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@@ -1,8 +1,12 @@
+use std::borrow::Cow;
+
 use enum_iterator::Sequence;
 
+use crate::progress::Step;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
-#[repr(u16)]
-pub enum Step {
+#[repr(u8)]
+pub enum IndexingStep {
     PreparingPayloads,
     ExtractingDocuments,
     ExtractingFacets,
@@ -18,30 +22,31 @@ pub enum Step {
     Finalizing,
 }
 
-impl Step {
-    pub fn name(&self) -> &'static str {
+impl Step for IndexingStep {
+    fn name(&self) -> Cow<'static, str> {
         match self {
-            Step::PreparingPayloads => "preparing update file",
-            Step::ExtractingDocuments => "extracting documents",
-            Step::ExtractingFacets => "extracting facets",
-            Step::ExtractingWords => "extracting words",
-            Step::ExtractingWordProximity => "extracting word proximity",
-            Step::ExtractingEmbeddings => "extracting embeddings",
-            Step::WritingGeoPoints => "writing geo points",
-            Step::WritingToDatabase => "writing to database",
-            Step::WaitingForExtractors => "waiting for extractors",
-            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
-            Step::PostProcessingFacets => "post-processing facets",
-            Step::PostProcessingWords => "post-processing words",
-            Step::Finalizing => "finalizing",
+            IndexingStep::PreparingPayloads => "preparing update file",
+            IndexingStep::ExtractingDocuments => "extracting documents",
+            IndexingStep::ExtractingFacets => "extracting facets",
+            IndexingStep::ExtractingWords => "extracting words",
+            IndexingStep::ExtractingWordProximity => "extracting word proximity",
+            IndexingStep::ExtractingEmbeddings => "extracting embeddings",
+            IndexingStep::WritingGeoPoints => "writing geo points",
+            IndexingStep::WritingToDatabase => "writing to database",
+            IndexingStep::WaitingForExtractors => "waiting for extractors",
+            IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
+            IndexingStep::PostProcessingFacets => "post-processing facets",
+            IndexingStep::PostProcessingWords => "post-processing words",
+            IndexingStep::Finalizing => "finalizing",
         }
+        .into()
     }
 
-    pub fn finished_steps(self) -> u16 {
-        self as u16
+    fn current(&self) -> u32 {
+        *self as u32
     }
 
-    pub const fn total_steps() -> u16 {
-        Self::CARDINALITY as u16
+    fn total(&self) -> u32 {
+        Self::CARDINALITY as u32
     }
 }
diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs
index 418cdc356..ced81409d 100644
--- a/crates/milli/tests/search/facet_distribution.rs
+++ b/crates/milli/tests/search/facet_distribution.rs
@@ -3,6 +3,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use maplit::hashset;
 use milli::documents::mmap_from_objects;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() {
             None,
             &mut new_fields_ids_map,
             &|| false,
-            &|_progress| (),
+            Progress::default(),
         )
         .unwrap();
 
@@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() {
         &document_changes,
         embedders,
         &|| false,
-        &|_| (),
+        &Progress::default(),
     )
     .unwrap();
 
diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs
index 08b22d7b6..30690969b 100644
--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@@ -7,6 +7,7 @@ use bumpalo::Bump;
 use either::{Either, Left, Right};
 use heed::EnvOpenOptions;
 use maplit::{btreemap, hashset};
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
             None,
             &mut new_fields_ids_map,
             &|| false,
-            &|_progress| (),
+            Progress::default(),
         )
         .unwrap();
 
@@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
         &document_changes,
         embedders,
         &|| false,
-        &|_| (),
+        &Progress::default(),
     )
     .unwrap();
 
diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs
index 8401f0444..304059915 100644
--- a/crates/milli/tests/search/query_criteria.rs
+++ b/crates/milli/tests/search/query_criteria.rs
@@ -5,6 +5,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use itertools::Itertools;
 use maplit::hashset;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@@ -326,7 +327,7 @@ fn criteria_ascdesc() {
             None,
             &mut new_fields_ids_map,
             &|| false,
-            &|_progress| (),
+            Progress::default(),
         )
         .unwrap();
 
@@ -341,7 +342,7 @@ fn criteria_ascdesc() {
         &document_changes,
         embedders,
         &|| false,
-        &|_| (),
+        &Progress::default(),
     )
     .unwrap();
 
diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs
index dbee296ee..d33d79e54 100644
--- a/crates/milli/tests/search/typo_tolerance.rs
+++ b/crates/milli/tests/search/typo_tolerance.rs
@@ -3,6 +3,7 @@ use std::collections::BTreeSet;
 use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use milli::documents::mmap_from_objects;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() {
             None,
             &mut new_fields_ids_map,
             &|| false,
-            &|_progress| (),
+            Progress::default(),
         )
         .unwrap();
 
@@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() {
         &document_changes,
         embedders,
         &|| false,
-        &|_| (),
+        &Progress::default(),
     )
     .unwrap();
 

From 6f4823fc9728236bd78c2f09affb7c1b1ae514ff Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 10 Dec 2024 16:58:13 +0100
Subject: [PATCH 131/158] make the number of document in the document tasks
 more incremental

---
 crates/milli/src/update/new/indexer/document_changes.rs   | 8 +++-----
 crates/milli/src/update/new/indexer/document_operation.rs | 5 ++---
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs
index f2edfb1f3..799763658 100644
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@@ -230,7 +230,7 @@ where
     send_progress.update_progress(progress_step);
 
     let pi = document_changes.iter(CHUNK_SIZE);
-    pi.enumerate().try_arc_for_each_try_init(
+    pi.try_arc_for_each_try_init(
         || {
             DocumentChangeContext::new(
                 index,
@@ -243,13 +243,10 @@ where
                 move |index_alloc| extractor.init_data(index_alloc),
             )
         },
-        |context, (finished_documents, items)| {
+        |context, items| {
             if (must_stop_processing)() {
                 return Err(Arc::new(InternalError::AbortedIndexation.into()));
             }
-            let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
-
-            step.store(finished_documents, Ordering::Relaxed);
 
             // Clean up and reuse the document-specific allocator
             context.doc_alloc.reset();
@@ -260,6 +257,7 @@ where
             });
 
             let res = extractor.process(changes, context).map_err(Arc::new);
+            step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
 
             // send back the doc_alloc in the pool
             context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs
index 0ce53d5d2..4418944db 100644
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -15,7 +15,7 @@ use super::super::document_change::DocumentChange;
 use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use super::retrieve_or_guess_primary_key;
 use crate::documents::PrimaryKey;
-use crate::progress::{AtomicSubStep, Progress};
+use crate::progress::{AtomicDocumentStep, Progress};
 use crate::update::new::document::Versions;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
@@ -71,8 +71,7 @@ impl<'pl> DocumentOperation<'pl> {
         let mut primary_key = None;
 
         let payload_count = operations.len();
-        let (step, progress_step) =
-            AtomicSubStep::<crate::progress::Document>::new(payload_count as u32);
+        let (step, progress_step) = AtomicDocumentStep::new(payload_count as u32);
         progress.update_progress(progress_step);
 
         for (payload_index, operation) in operations.into_iter().enumerate() {

From 867e6a8f1dc2fc7a1fbc5e351335213f2eb8ea6c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 10 Dec 2024 17:04:04 +0100
Subject: [PATCH 132/158] rename the send_progress field to progress since it s
 not sending anything

---
 .../src/update/new/indexer/document_changes.rs   |  5 ++---
 .../src/update/new/indexer/document_deletion.rs  |  2 +-
 crates/milli/src/update/new/indexer/mod.rs       | 16 +++++++---------
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs
index 799763658..3e2b9c036 100644
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@@ -144,8 +144,7 @@ pub struct IndexingContext<
     pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
     pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
     pub must_stop_processing: &'indexer MSP,
-    // TODO: TAMO: Rename field to progress
-    pub send_progress: &'indexer Progress,
+    pub progress: &'indexer Progress,
 }
 
 impl<
@@ -207,7 +206,7 @@ pub fn extract<
         doc_allocs,
         fields_ids_map_store,
         must_stop_processing,
-        send_progress,
+        progress: send_progress,
     }: IndexingContext<'fid, 'indexer, 'index, MSP>,
     extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
     datastore: &'data ThreadLocal<EX::Data>,
diff --git a/crates/milli/src/update/new/indexer/document_deletion.rs b/crates/milli/src/update/new/indexer/document_deletion.rs
index 33e69e49c..b42a6c859 100644
--- a/crates/milli/src/update/new/indexer/document_deletion.rs
+++ b/crates/milli/src/update/new/indexer/document_deletion.rs
@@ -165,7 +165,7 @@ mod test {
             doc_allocs: &doc_allocs,
             fields_ids_map_store: &fields_ids_map_store,
             must_stop_processing: &(|| false),
-            send_progress: &Progress::default(),
+            progress: &Progress::default(),
         };
 
         for _ in 0..3 {
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index 79416bcd5..acdf78304 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -125,7 +125,7 @@ where
         doc_allocs: &doc_allocs,
         fields_ids_map_store: &fields_ids_map_store,
         must_stop_processing,
-        send_progress,
+        progress: send_progress,
     };
 
     let mut index_embeddings = index.embedding_configs(wtxn)?;
@@ -383,7 +383,7 @@ where
                         &indexing_context.must_stop_processing,
                     )?;
                 }
-                indexing_context.send_progress.update_progress(IndexingStep::WritingToDatabase);
+                indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
                 finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
 
                 Result::Ok((facet_field_ids_delta, index_embeddings))
@@ -483,7 +483,7 @@ where
             )?;
         }
 
-        indexing_context.send_progress.update_progress(IndexingStep::WaitingForExtractors);
+        indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
 
         let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
 
@@ -496,9 +496,7 @@ where
                 break 'vectors;
             }
 
-            indexing_context
-                .send_progress
-                .update_progress(IndexingStep::WritingEmbeddingsToDatabase);
+            indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
             let mut rng = rand::rngs::StdRng::seed_from_u64(42);
             for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
                 let dimensions = *dimensions;
@@ -514,19 +512,19 @@ where
             index.put_embedding_configs(wtxn, index_embeddings)?;
         }
 
-        indexing_context.send_progress.update_progress(IndexingStep::PostProcessingFacets);
+        indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
         if index.facet_search(wtxn)? {
             compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
         }
 
         compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
 
-        indexing_context.send_progress.update_progress(IndexingStep::PostProcessingWords);
+        indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
         if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
             compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
         }
 
-        indexing_context.send_progress.update_progress(IndexingStep::Finalizing);
+        indexing_context.progress.update_progress(IndexingStep::Finalizing);
 
         Ok(()) as Result<_>
     })?;

From ab75f53efdd2f408d95a9bfa187ad5ade93a0e7d Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 10 Dec 2024 17:09:10 +0100
Subject: [PATCH 133/158] update all snapshots

---
 crates/index-scheduler/src/lib.rs       | 10 ++++++++++
 crates/meilisearch/tests/batches/mod.rs | 21 +++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index f5f73087d..d3e65c6f8 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -4308,6 +4308,16 @@ mod tests {
         snapshot!(batch, @r#"
         {
           "uid": 0,
+          "progress": {
+            "steps": [
+              {
+                "name": "processing tasks",
+                "finished": 0,
+                "total": 2
+              }
+            ],
+            "percentage": 0.0
+          },
           "details": {
             "primaryKey": "mouse"
           },
diff --git a/crates/meilisearch/tests/batches/mod.rs b/crates/meilisearch/tests/batches/mod.rs
index 9c869c140..581e92837 100644
--- a/crates/meilisearch/tests/batches/mod.rs
+++ b/crates/meilisearch/tests/batches/mod.rs
@@ -284,6 +284,7 @@ async fn test_summarized_document_addition_or_update() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "receivedDocuments": 1,
         "indexedDocuments": 1
@@ -314,6 +315,7 @@ async fn test_summarized_document_addition_or_update() {
         @r#"
     {
       "uid": 1,
+      "progress": null,
       "details": {
         "receivedDocuments": 1,
         "indexedDocuments": 1
@@ -349,6 +351,7 @@ async fn test_summarized_delete_documents_by_batch() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "providedIds": 3,
         "deletedDocuments": 0
@@ -380,6 +383,7 @@ async fn test_summarized_delete_documents_by_batch() {
         @r#"
     {
       "uid": 2,
+      "progress": null,
       "details": {
         "providedIds": 1,
         "deletedDocuments": 0
@@ -416,6 +420,7 @@ async fn test_summarized_delete_documents_by_filter() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "providedIds": 0,
         "deletedDocuments": 0,
@@ -448,6 +453,7 @@ async fn test_summarized_delete_documents_by_filter() {
         @r#"
     {
       "uid": 2,
+      "progress": null,
       "details": {
         "providedIds": 0,
         "deletedDocuments": 0,
@@ -480,6 +486,7 @@ async fn test_summarized_delete_documents_by_filter() {
         @r#"
     {
       "uid": 4,
+      "progress": null,
       "details": {
         "providedIds": 0,
         "deletedDocuments": 0,
@@ -516,6 +523,7 @@ async fn test_summarized_delete_document_by_id() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "providedIds": 1,
         "deletedDocuments": 0
@@ -547,6 +555,7 @@ async fn test_summarized_delete_document_by_id() {
         @r#"
     {
       "uid": 2,
+      "progress": null,
       "details": {
         "providedIds": 1,
         "deletedDocuments": 0
@@ -594,6 +603,7 @@ async fn test_summarized_settings_update() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "displayedAttributes": [
           "doggos",
@@ -638,6 +648,7 @@ async fn test_summarized_index_creation() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {},
       "stats": {
         "totalNbTasks": 1,
@@ -665,6 +676,7 @@ async fn test_summarized_index_creation() {
         @r#"
     {
       "uid": 1,
+      "progress": null,
       "details": {
         "primaryKey": "doggos"
       },
@@ -809,6 +821,7 @@ async fn test_summarized_index_update() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {},
       "stats": {
         "totalNbTasks": 1,
@@ -836,6 +849,7 @@ async fn test_summarized_index_update() {
         @r#"
     {
       "uid": 1,
+      "progress": null,
       "details": {
         "primaryKey": "bones"
       },
@@ -868,6 +882,7 @@ async fn test_summarized_index_update() {
         @r#"
     {
       "uid": 3,
+      "progress": null,
       "details": {},
       "stats": {
         "totalNbTasks": 1,
@@ -895,6 +910,7 @@ async fn test_summarized_index_update() {
         @r#"
     {
       "uid": 4,
+      "progress": null,
       "details": {
         "primaryKey": "bones"
       },
@@ -932,6 +948,7 @@ async fn test_summarized_index_swap() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "swaps": [
           {
@@ -972,6 +989,7 @@ async fn test_summarized_index_swap() {
         @r#"
     {
       "uid": 3,
+      "progress": null,
       "details": {
         "swaps": [
           {
@@ -1014,6 +1032,7 @@ async fn test_summarized_batch_cancelation() {
         @r#"
     {
       "uid": 1,
+      "progress": null,
       "details": {
         "matchedTasks": 1,
         "canceledTasks": 0,
@@ -1051,6 +1070,7 @@ async fn test_summarized_batch_deletion() {
         @r#"
     {
       "uid": 1,
+      "progress": null,
       "details": {
         "matchedTasks": 1,
         "deletedTasks": 1,
@@ -1084,6 +1104,7 @@ async fn test_summarized_dump_creation() {
         @r#"
     {
       "uid": 0,
+      "progress": null,
       "details": {
         "dumpUid": "[dumpUid]"
       },

From 26733c705d55be4788c9a513b3654d71498679ff Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 10 Dec 2024 22:29:31 +0100
Subject: [PATCH 134/158] add progress for the task deletion and task
 cancelation

---
 Cargo.lock                               |  1 +
 crates/index-scheduler/Cargo.toml        |  1 +
 crates/index-scheduler/src/batch.rs      | 57 +++++++++++++--
 crates/index-scheduler/src/processing.rs | 88 ++++++++++++++++--------
 4 files changed, 115 insertions(+), 32 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index de7dabc36..91c83fb13 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2632,6 +2632,7 @@ dependencies = [
  "bincode",
  "bumpalo",
  "bumparaw-collections",
+ "convert_case 0.6.0",
  "crossbeam-channel",
  "csv",
  "derive_builder 0.20.0",
diff --git a/crates/index-scheduler/Cargo.toml b/crates/index-scheduler/Cargo.toml
index 5d7eb1913..ec2f17f84 100644
--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@@ -15,6 +15,7 @@ anyhow = "1.0.86"
 bincode = "1.3.3"
 bumpalo = "3.16.0"
 bumparaw-collections = "0.1.2"
+convert_case = "0.6.0"
 csv = "1.3.0"
 derive_builder = "0.20.0"
 dump = { path = "../dump" }
diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 1bfa7f53b..fe055b185 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -22,6 +22,7 @@ use std::ffi::OsStr;
 use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;
+use std::sync::atomic::Ordering;
 
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
@@ -48,6 +49,9 @@ use time::OffsetDateTime;
 use uuid::Uuid;
 
 use crate::autobatcher::{self, BatchKind};
+use crate::processing::{
+    AtomicBatchStep, AtomicTaskStep, TaskCancelationProgress, TaskDeletionProgress,
+};
 use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
 use crate::{Error, IndexScheduler, Result, TaskId};
 
@@ -583,8 +587,13 @@ impl IndexScheduler {
                     };
 
                 let rtxn = self.env.read_txn()?;
-                let mut canceled_tasks =
-                    self.cancel_matched_tasks(&rtxn, task.uid, current_batch, matched_tasks)?;
+                let mut canceled_tasks = self.cancel_matched_tasks(
+                    &rtxn,
+                    task.uid,
+                    current_batch,
+                    matched_tasks,
+                    &progress,
+                )?;
 
                 task.status = Status::Succeeded;
                 match &mut task.details {
@@ -615,7 +624,8 @@ impl IndexScheduler {
                 }
 
                 let mut wtxn = self.env.write_txn()?;
-                let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
+                let mut deleted_tasks =
+                    self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
                 wtxn.commit()?;
 
                 for task in tasks.iter_mut() {
@@ -1664,7 +1674,10 @@ impl IndexScheduler {
         &self,
         wtxn: &mut RwTxn,
         matched_tasks: &RoaringBitmap,
+        progress: &Progress,
     ) -> Result<RoaringBitmap> {
+        progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
+
         // 1. Remove from this list the tasks that we are not allowed to delete
         let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
         let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1683,6 +1696,8 @@ impl IndexScheduler {
         // The tasks that have been removed *per batches*.
         let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
 
+        let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
+        progress.update_progress(task_progress);
         for task_id in to_delete_tasks.iter() {
             let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
 
@@ -1706,22 +1721,35 @@ impl IndexScheduler {
             if let Some(batch_uid) = task.batch_uid {
                 affected_batches.entry(batch_uid).or_default().insert(task_id);
             }
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
         }
 
+        progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
+        let (atomic_progress, task_progress) = AtomicTaskStep::new(
+            (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
+        );
+        progress.update_progress(task_progress);
         for index in affected_indexes.iter() {
             self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
         }
 
         for status in affected_statuses.iter() {
             self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
         }
 
         for kind in affected_kinds.iter() {
             self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
         }
 
+        progress.update_progress(TaskDeletionProgress::DeletingTasks);
+        let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
+        progress.update_progress(task_progress);
         for task in to_delete_tasks.iter() {
             self.all_tasks.delete(wtxn, &task)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
         }
         for canceled_by in affected_canceled_by {
             if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
@@ -1733,6 +1761,9 @@ impl IndexScheduler {
                 }
             }
         }
+        progress.update_progress(TaskDeletionProgress::DeletingBatches);
+        let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
+        progress.update_progress(batch_progress);
         for (batch_id, to_delete_tasks) in affected_batches {
             if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
                 tasks -= &to_delete_tasks;
@@ -1774,6 +1805,7 @@ impl IndexScheduler {
                     }
                 }
             }
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
         }
 
         Ok(to_delete_tasks)
@@ -1788,21 +1820,36 @@ impl IndexScheduler {
         cancel_task_id: TaskId,
         current_batch: &mut ProcessingBatch,
         matched_tasks: &RoaringBitmap,
+        progress: &Progress,
     ) -> Result<Vec<Task>> {
+        progress.update_progress(TaskCancelationProgress::RetrievingTasks);
+
         // 1. Remove from this list the tasks that we are not allowed to cancel
         //    Notice that only the _enqueued_ ones are cancelable and we should
         //    have already aborted the indexation of the _processing_ ones
         let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
         let tasks_to_cancel = cancelable_tasks & matched_tasks;
 
-        // 2. We now have a list of tasks to cancel, cancel them
-        let mut tasks = self.get_existing_tasks(rtxn, tasks_to_cancel.iter())?;
+        let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
+        progress.update_progress(progress_obj);
 
+        // 2. We now have a list of tasks to cancel, cancel them
+        let mut tasks = self.get_existing_tasks(
+            rtxn,
+            tasks_to_cancel.iter().inspect(|_| {
+                task_progress.fetch_add(1, Ordering::Relaxed);
+            }),
+        )?;
+
+        progress.update_progress(TaskCancelationProgress::UpdatingTasks);
+        let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
+        progress.update_progress(progress_obj);
         for task in tasks.iter_mut() {
             task.status = Status::Canceled;
             task.canceled_by = Some(cancel_task_id);
             task.details = task.details.as_ref().map(|d| d.to_failed());
             current_batch.processing(Some(task));
+            task_progress.fetch_add(1, Ordering::Relaxed);
         }
 
         Ok(tasks)
diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index e5e892927..f28fa0219 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -1,4 +1,5 @@
 use crate::utils::ProcessingBatch;
+use enum_iterator::Sequence;
 use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
 use roaring::RoaringBitmap;
 use std::{borrow::Cow, sync::Arc};
@@ -54,39 +55,72 @@ impl ProcessingTasks {
     }
 }
 
-#[repr(u8)]
-#[derive(Copy, Clone)]
-pub enum BatchProgress {
-    ProcessingTasks,
-    WritingTasksToDisk,
-}
-
-impl Step for BatchProgress {
-    fn name(&self) -> Cow<'static, str> {
-        match self {
-            BatchProgress::ProcessingTasks => Cow::Borrowed("processing tasks"),
-            BatchProgress::WritingTasksToDisk => Cow::Borrowed("writing tasks to disk"),
+macro_rules! make_enum_progress {
+    (enum $name:ident: $(- $variant:ident)+ ) => {
+        #[repr(u8)]
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
+        #[allow(clippy::enum_variant_names)]
+        pub enum $name {
+            $($variant),+
         }
-    }
 
-    fn current(&self) -> u32 {
-        *self as u8 as u32
-    }
+        impl Step for $name {
+            fn name(&self) -> Cow<'static, str> {
+                use convert_case::Casing;
 
-    fn total(&self) -> u32 {
-        2
-    }
+                match self {
+                    $(
+                        $name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
+                    ),+
+                }
+            }
+
+            fn current(&self) -> u32 {
+                *self as u32
+            }
+
+            fn total(&self) -> u32 {
+                Self::CARDINALITY as u32
+            }
+        }
+    };
 }
 
-#[derive(Default)]
-pub struct Task {}
-
-impl NamedStep for Task {
-    fn name(&self) -> &'static str {
-        "task"
-    }
+macro_rules! make_atomic_progress {
+    ($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
+        #[derive(Default, Debug, Clone, Copy)]
+        pub struct $struct_name {}
+        impl NamedStep for $struct_name {
+            fn name(&self) -> &'static str {
+                $step_name
+            }
+        }
+        pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
+    };
 }
-pub type AtomicTaskStep = AtomicSubStep<Task>;
+
+make_enum_progress! {
+    enum BatchProgress:
+        - ProcessingTasks
+        - WritingTasksToDisk
+}
+
+make_enum_progress! {
+    enum TaskCancelationProgress:
+        - RetrievingTasks
+        - UpdatingTasks
+}
+
+make_enum_progress! {
+    enum TaskDeletionProgress:
+        - DeletingTasksDateTime
+        - DeletingTasksMetadata
+        - DeletingTasks
+        - DeletingBatches
+}
+
+make_atomic_progress!(Task alias AtomicTaskStep => "task" );
+make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
 
 #[cfg(test)]
 mod test {

From 786b0fabea2a979442923f32ffbecc8208671cf9 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 16:18:12 +0100
Subject: [PATCH 135/158] implement the progress for almost all the tasks

---
 crates/index-scheduler/src/batch.rs      | 106 +++++++++++++++++++++--
 crates/index-scheduler/src/processing.rs | 103 ++++++++++++++++++++++
 2 files changed, 203 insertions(+), 6 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index fe055b185..733984043 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -50,7 +50,11 @@ use uuid::Uuid;
 
 use crate::autobatcher::{self, BatchKind};
 use crate::processing::{
-    AtomicBatchStep, AtomicTaskStep, TaskCancelationProgress, TaskDeletionProgress,
+    AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress,
+    DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress,
+    DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress,
+    SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
+    UpdateIndexProgress, VariableNameStep,
 };
 use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
 use crate::{Error, IndexScheduler, Result, TaskId};
@@ -651,6 +655,8 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             Batch::SnapshotCreation(mut tasks) => {
+                progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
+
                 fs::create_dir_all(&self.snapshots_path)?;
                 let temp_snapshot_dir = tempfile::tempdir()?;
 
@@ -671,6 +677,7 @@ impl IndexScheduler {
                 // two read operations as the task processing is synchronous.
 
                 // 2.1 First copy the LMDB env of the index-scheduler
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
                 let dst = temp_snapshot_dir.path().join("tasks");
                 fs::create_dir_all(&dst)?;
                 self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
@@ -683,6 +690,11 @@ impl IndexScheduler {
                 fs::create_dir_all(&update_files_dir)?;
 
                 // 2.4 Only copy the update files of the enqueued tasks
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
+                let enqueued = self.get_status(&rtxn, Status::Enqueued)?;
+                let (atomic, update_file_progress) =
+                    AtomicUpdateFileStep::new(enqueued.len() as u32);
+                progress.update_progress(update_file_progress);
                 for task_id in self.get_status(&rtxn, Status::Enqueued)? {
                     let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
                     if let Some(content_uuid) = task.content_uuid() {
@@ -690,11 +702,17 @@ impl IndexScheduler {
                         let dst = update_files_dir.join(content_uuid.to_string());
                         fs::copy(src, dst)?;
                     }
+                    atomic.fetch_add(1, Ordering::Relaxed);
                 }
 
                 // 3. Snapshot every indexes
-                for result in self.index_mapper.index_mapping.iter(&rtxn)? {
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
+                let index_mapping = self.index_mapper.index_mapping;
+                let nb_indexes = index_mapping.len(&rtxn)? as u32;
+
+                for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
                     let (name, uuid) = result?;
+                    progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes));
                     let index = self.index_mapper.index(&rtxn, name)?;
                     let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
                     fs::create_dir_all(&dst)?;
@@ -706,6 +724,7 @@ impl IndexScheduler {
                 drop(rtxn);
 
                 // 4. Snapshot the auth LMDB env
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
                 let dst = temp_snapshot_dir.path().join("auth");
                 fs::create_dir_all(&dst)?;
                 // TODO We can't use the open_auth_store_env function here but we should
@@ -718,6 +737,7 @@ impl IndexScheduler {
                 auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
 
                 // 5. Copy and tarball the flat snapshot
+                progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
                 // 5.1 Find the original name of the database
                 // TODO find a better way to get this path
                 let mut base_path = self.env.path().to_owned();
@@ -750,6 +770,7 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             Batch::Dump(mut task) => {
+                progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
                 let started_at = OffsetDateTime::now_utc();
                 let (keys, instance_uid) =
                     if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
@@ -760,6 +781,7 @@ impl IndexScheduler {
                 let dump = dump::DumpWriter::new(*instance_uid)?;
 
                 // 1. dump the keys
+                progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
                 let mut dump_keys = dump.create_keys()?;
                 for key in keys {
                     dump_keys.push_key(key)?;
@@ -769,7 +791,13 @@ impl IndexScheduler {
                 let rtxn = self.env.read_txn()?;
 
                 // 2. dump the tasks
+                progress.update_progress(DumpCreationProgress::DumpTheTasks);
                 let mut dump_tasks = dump.create_tasks_queue()?;
+
+                let (atomic, update_task_progress) =
+                    AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32);
+                progress.update_progress(update_task_progress);
+
                 for ret in self.all_tasks.iter(&rtxn)? {
                     if self.must_stop_processing.get() {
                         return Err(Error::AbortedTask);
@@ -819,11 +847,22 @@ impl IndexScheduler {
                             dump_content_file.flush()?;
                         }
                     }
+                    atomic.fetch_add(1, Ordering::Relaxed);
                 }
                 dump_tasks.flush()?;
 
                 // 3. Dump the indexes
+                progress.update_progress(DumpCreationProgress::DumpTheIndexes);
+                let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
+                let mut count = 0;
                 self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
+                    progress.update_progress(VariableNameStep::new(
+                        uid.to_string(),
+                        count,
+                        nb_indexes,
+                    ));
+                    count += 1;
+
                     let rtxn = index.read_txn()?;
                     let metadata = IndexMetadata {
                         uid: uid.to_owned(),
@@ -843,6 +882,12 @@ impl IndexScheduler {
                         .embedding_configs(&rtxn)
                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
 
+                    let nb_documents = index
+                        .number_of_documents(&rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
+                        as u32;
+                    let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
+                    progress.update_progress(update_document_progress);
                     let documents = index
                         .all_documents(&rtxn)
                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
@@ -912,6 +957,7 @@ impl IndexScheduler {
                         }
 
                         index_dumper.push_document(&document)?;
+                        atomic.fetch_add(1, Ordering::Relaxed);
                     }
 
                     // 3.2. Dump the settings
@@ -926,6 +972,7 @@ impl IndexScheduler {
                 })?;
 
                 // 4. Dump experimental feature settings
+                progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
                 let features = self.features().runtime_features();
                 dump.create_experimental_features(features)?;
 
@@ -936,6 +983,7 @@ impl IndexScheduler {
                 if self.must_stop_processing.get() {
                     return Err(Error::AbortedTask);
                 }
+                progress.update_progress(DumpCreationProgress::CompressTheDump);
                 let path = self.dumps_path.join(format!("{}.dump", dump_uid));
                 let file = File::create(path)?;
                 dump.persist_to(BufWriter::new(file))?;
@@ -995,6 +1043,8 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             Batch::IndexCreation { index_uid, primary_key, task } => {
+                progress.update_progress(CreateIndexProgress::CreatingTheIndex);
+
                 let wtxn = self.env.write_txn()?;
                 if self.index_mapper.exists(&wtxn, &index_uid)? {
                     return Err(Error::IndexAlreadyExists(index_uid));
@@ -1008,6 +1058,7 @@ impl IndexScheduler {
                 )
             }
             Batch::IndexUpdate { index_uid, primary_key, mut task } => {
+                progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
                 let rtxn = self.env.read_txn()?;
                 let index = self.index_mapper.index(&rtxn, &index_uid)?;
 
@@ -1060,6 +1111,7 @@ impl IndexScheduler {
                 Ok(vec![task])
             }
             Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
+                progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
                 let wtxn = self.env.write_txn()?;
 
                 // it's possible that the index doesn't exist
@@ -1093,6 +1145,8 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             Batch::IndexSwap { mut task } => {
+                progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
+
                 let mut wtxn = self.env.write_txn()?;
                 let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
                     swaps
@@ -1119,8 +1173,20 @@ impl IndexScheduler {
                         ));
                     }
                 }
-                for swap in swaps {
-                    self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
+                progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
+                for (step, swap) in swaps.iter().enumerate() {
+                    progress.update_progress(VariableNameStep::new(
+                        format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
+                        step as u32,
+                        swaps.len() as u32,
+                    ));
+                    self.apply_index_swap(
+                        &mut wtxn,
+                        &progress,
+                        task.uid,
+                        &swap.indexes.0,
+                        &swap.indexes.1,
+                    )?;
                 }
                 wtxn.commit()?;
                 task.status = Status::Succeeded;
@@ -1130,7 +1196,15 @@ impl IndexScheduler {
     }
 
     /// Swap the index `lhs` with the index `rhs`.
-    fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
+    fn apply_index_swap(
+        &self,
+        wtxn: &mut RwTxn,
+        progress: &Progress,
+        task_id: u32,
+        lhs: &str,
+        rhs: &str,
+    ) -> Result<()> {
+        progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
         // 1. Verify that both lhs and rhs are existing indexes
         let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
         if !index_lhs_exists {
@@ -1148,14 +1222,21 @@ impl IndexScheduler {
         index_rhs_task_ids.remove_range(task_id..);
 
         // 3. before_name -> new_name in the task's KindWithContent
-        for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
+        progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
+        let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
+        let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
+        progress.update_progress(task_progress);
+
+        for task_id in tasks_to_update {
             let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
             swap_index_uid_in_task(&mut task, (lhs, rhs));
             self.all_tasks.put(wtxn, &task_id, &task)?;
+            atomic.fetch_add(1, Ordering::Relaxed);
         }
 
         // 4. remove the task from indexuid = before_name
         // 5. add the task to indexuid = after_name
+        progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
         self.update_index(wtxn, lhs, |lhs_tasks| {
             *lhs_tasks -= &index_lhs_task_ids;
             *lhs_tasks |= &index_rhs_task_ids;
@@ -1222,6 +1303,7 @@ impl IndexScheduler {
                 operations,
                 mut tasks,
             } => {
+                progress.update_progress(DocumentOperationProgress::RetrievingConfig);
                 // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
                 // this is made difficult by the fact we're doing private clones of the index scheduler and sending it
                 // to a fresh thread.
@@ -1277,6 +1359,7 @@ impl IndexScheduler {
                     }
                 };
 
+                progress.update_progress(DocumentOperationProgress::ComputingTheChanges);
                 let (document_changes, operation_stats, primary_key) = indexer
                     .into_changes(
                         &indexer_alloc,
@@ -1321,6 +1404,7 @@ impl IndexScheduler {
                     }
                 }
 
+                progress.update_progress(DocumentOperationProgress::Indexing);
                 if tasks.iter().any(|res| res.error.is_none()) {
                     indexer::index(
                         index_wtxn,
@@ -1350,6 +1434,8 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             IndexOperation::DocumentEdition { index_uid, mut task } => {
+                progress.update_progress(DocumentEditionProgress::RetrievingConfig);
+
                 let (filter, code) = if let KindWithContent::DocumentEdition {
                     filter_expr,
                     context: _,
@@ -1423,6 +1509,7 @@ impl IndexScheduler {
                     };
 
                     let candidates_count = candidates.len();
+                    progress.update_progress(DocumentEditionProgress::ComputingTheChanges);
                     let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
                     let document_changes = pool
                         .install(|| {
@@ -1436,6 +1523,7 @@ impl IndexScheduler {
                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
                     let embedders = self.embedders(index_uid.clone(), embedders)?;
 
+                    progress.update_progress(DocumentEditionProgress::Indexing);
                     indexer::index(
                         index_wtxn,
                         index,
@@ -1488,6 +1576,8 @@ impl IndexScheduler {
                 Ok(vec![task])
             }
             IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
+                progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
+
                 let mut to_delete = RoaringBitmap::new();
                 let external_documents_ids = index.external_documents_ids();
 
@@ -1578,6 +1668,7 @@ impl IndexScheduler {
                         }
                     };
 
+                    progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
                     let mut indexer = indexer::DocumentDeletion::new();
                     let candidates_count = to_delete.len();
                     indexer.delete_documents_by_docids(to_delete);
@@ -1587,6 +1678,7 @@ impl IndexScheduler {
                         .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
                     let embedders = self.embedders(index_uid.clone(), embedders)?;
 
+                    progress.update_progress(DocumentDeletionProgress::Indexing);
                     indexer::index(
                         index_wtxn,
                         index,
@@ -1615,6 +1707,7 @@ impl IndexScheduler {
                 Ok(tasks)
             }
             IndexOperation::Settings { index_uid, settings, mut tasks } => {
+                progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
                 let indexer_config = self.index_mapper.indexer_config();
                 let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
 
@@ -1628,6 +1721,7 @@ impl IndexScheduler {
                     task.status = Status::Succeeded;
                 }
 
+                progress.update_progress(SettingsProgress::ApplyTheSettings);
                 builder
                     .execute(
                         |indexing_step| tracing::debug!(update = ?indexing_step),
diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index f28fa0219..479b6274f 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -119,8 +119,111 @@ make_enum_progress! {
         - DeletingBatches
 }
 
+make_enum_progress! {
+    enum SnapshotCreationProgress:
+        - StartTheSnapshotCreation
+        - SnapshotTheIndexScheduler
+        - SnapshotTheUpdateFiles
+        - SnapshotTheIndexes
+        - SnapshotTheApiKeys
+        - CreateTheTarball
+}
+
+make_enum_progress! {
+    enum DumpCreationProgress:
+        - StartTheDumpCreation
+        - DumpTheApiKeys
+        - DumpTheTasks
+        - DumpTheIndexes
+        - DumpTheExperimentalFeatures
+        - CompressTheDump
+}
+
+make_enum_progress! {
+    enum CreateIndexProgress:
+        - CreatingTheIndex
+}
+
+make_enum_progress! {
+    enum UpdateIndexProgress:
+        - UpdatingTheIndex
+}
+
+make_enum_progress! {
+    enum DeleteIndexProgress:
+        - DeletingTheIndex
+}
+
+make_enum_progress! {
+    enum SwappingTheIndexes:
+        - EnsuringCorrectnessOfTheSwap
+        - SwappingTheIndexes
+}
+
+make_enum_progress! {
+    enum InnerSwappingTwoIndexes:
+        - RetrieveTheTasks
+        - UpdateTheTasks
+        - UpdateTheIndexesMetadata
+}
+
+make_enum_progress! {
+    enum DocumentOperationProgress:
+        - RetrievingConfig
+        - ComputingTheChanges
+        - Indexing
+}
+
+make_enum_progress! {
+    enum DocumentEditionProgress:
+        - RetrievingConfig
+        - ComputingTheChanges
+        - Indexing
+}
+
+make_enum_progress! {
+    enum DocumentDeletionProgress:
+        - RetrievingConfig
+        - DeleteDocuments
+        - Indexing
+}
+
+make_enum_progress! {
+    enum SettingsProgress:
+        - RetrievingAndMergingTheSettings
+        - ApplyTheSettings
+}
+
 make_atomic_progress!(Task alias AtomicTaskStep => "task" );
+make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
 make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
+make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
+
+pub struct VariableNameStep {
+    name: String,
+    current: u32,
+    total: u32,
+}
+
+impl VariableNameStep {
+    pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
+        Self { name: name.into(), current, total }
+    }
+}
+
+impl Step for VariableNameStep {
+    fn name(&self) -> Cow<'static, str> {
+        self.name.clone().into()
+    }
+
+    fn current(&self) -> u32 {
+        self.current
+    }
+
+    fn total(&self) -> u32 {
+        self.total
+    }
+}
 
 #[cfg(test)]
 mod test {

From 1f54dfa883adf86164ecc585561c01f55cfefde8 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 16:23:21 +0100
Subject: [PATCH 136/158] update the macro to look more like an enum

---
 crates/index-scheduler/src/processing.rs | 124 +++++++++++++----------
 1 file changed, 69 insertions(+), 55 deletions(-)

diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index 479b6274f..0bc449199 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -56,11 +56,11 @@ impl ProcessingTasks {
 }
 
 macro_rules! make_enum_progress {
-    (enum $name:ident: $(- $variant:ident)+ ) => {
+    ($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
         #[repr(u8)]
         #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
         #[allow(clippy::enum_variant_names)]
-        pub enum $name {
+        $visibility enum $name {
             $($variant),+
         }
 
@@ -100,98 +100,112 @@ macro_rules! make_atomic_progress {
 }
 
 make_enum_progress! {
-    enum BatchProgress:
-        - ProcessingTasks
-        - WritingTasksToDisk
+    pub enum BatchProgress {
+        ProcessingTasks,
+        WritingTasksToDisk,
+    }
 }
 
 make_enum_progress! {
-    enum TaskCancelationProgress:
-        - RetrievingTasks
-        - UpdatingTasks
+    pub enum TaskCancelationProgress {
+        RetrievingTasks,
+        UpdatingTasks,
+    }
 }
 
 make_enum_progress! {
-    enum TaskDeletionProgress:
-        - DeletingTasksDateTime
-        - DeletingTasksMetadata
-        - DeletingTasks
-        - DeletingBatches
+    pub enum TaskDeletionProgress {
+        DeletingTasksDateTime,
+        DeletingTasksMetadata,
+        DeletingTasks,
+        DeletingBatches,
+    }
 }
 
 make_enum_progress! {
-    enum SnapshotCreationProgress:
-        - StartTheSnapshotCreation
-        - SnapshotTheIndexScheduler
-        - SnapshotTheUpdateFiles
-        - SnapshotTheIndexes
-        - SnapshotTheApiKeys
-        - CreateTheTarball
+    pub enum SnapshotCreationProgress {
+        StartTheSnapshotCreation,
+        SnapshotTheIndexScheduler,
+        SnapshotTheUpdateFiles,
+        SnapshotTheIndexes,
+        SnapshotTheApiKeys,
+        CreateTheTarball,
+    }
 }
 
 make_enum_progress! {
-    enum DumpCreationProgress:
-        - StartTheDumpCreation
-        - DumpTheApiKeys
-        - DumpTheTasks
-        - DumpTheIndexes
-        - DumpTheExperimentalFeatures
-        - CompressTheDump
+    pub enum DumpCreationProgress {
+        StartTheDumpCreation,
+        DumpTheApiKeys,
+        DumpTheTasks,
+        DumpTheIndexes,
+        DumpTheExperimentalFeatures,
+        CompressTheDump,
+    }
 }
 
 make_enum_progress! {
-    enum CreateIndexProgress:
-        - CreatingTheIndex
+    pub enum CreateIndexProgress {
+        CreatingTheIndex,
+    }
 }
 
 make_enum_progress! {
-    enum UpdateIndexProgress:
-        - UpdatingTheIndex
+    pub enum UpdateIndexProgress {
+        UpdatingTheIndex,
+    }
 }
 
 make_enum_progress! {
-    enum DeleteIndexProgress:
-        - DeletingTheIndex
+    pub enum DeleteIndexProgress {
+        DeletingTheIndex,
+    }
 }
 
 make_enum_progress! {
-    enum SwappingTheIndexes:
-        - EnsuringCorrectnessOfTheSwap
-        - SwappingTheIndexes
+    pub enum SwappingTheIndexes {
+        EnsuringCorrectnessOfTheSwap,
+        SwappingTheIndexes,
+    }
 }
 
 make_enum_progress! {
-    enum InnerSwappingTwoIndexes:
-        - RetrieveTheTasks
-        - UpdateTheTasks
-        - UpdateTheIndexesMetadata
+    pub enum InnerSwappingTwoIndexes {
+        RetrieveTheTasks,
+        UpdateTheTasks,
+        UpdateTheIndexesMetadata,
+    }
 }
 
 make_enum_progress! {
-    enum DocumentOperationProgress:
-        - RetrievingConfig
-        - ComputingTheChanges
-        - Indexing
+    pub enum DocumentOperationProgress {
+        RetrievingConfig,
+        ComputingTheChanges,
+        Indexing,
+    }
 }
 
 make_enum_progress! {
-    enum DocumentEditionProgress:
-        - RetrievingConfig
-        - ComputingTheChanges
-        - Indexing
+    pub enum DocumentEditionProgress {
+        RetrievingConfig,
+        ComputingTheChanges,
+        Indexing,
+    }
 }
 
 make_enum_progress! {
-    enum DocumentDeletionProgress:
-        - RetrievingConfig
-        - DeleteDocuments
-        - Indexing
+    pub enum DocumentDeletionProgress {
+        RetrievingConfig,
+        DeleteDocuments,
+        Indexing,
+    }
 }
 
 make_enum_progress! {
-    enum SettingsProgress:
-        - RetrievingAndMergingTheSettings
-        - ApplyTheSettings
+    pub enum SettingsProgress {
+        RetrievingAndMergingTheSettings,
+        ApplyTheSettings,
+    }
 }
 
 make_atomic_progress!(Task alias AtomicTaskStep => "task" );

From 04a24a9239a8fbcd5e45bd2f154ef20e7ef91f59 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Wed, 11 Dec 2024 16:27:07 +0100
Subject: [PATCH 137/158] Kill Meilisearch with a TERM signal

---
 crates/meilisearch/src/main.rs          |  5 +++
 crates/xtask/src/bench/meili_process.rs | 49 +++++++++++++++++++++----
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/crates/meilisearch/src/main.rs b/crates/meilisearch/src/main.rs
index b4b46bec4..6e6245c78 100644
--- a/crates/meilisearch/src/main.rs
+++ b/crates/meilisearch/src/main.rs
@@ -129,6 +129,11 @@ async fn try_main() -> anyhow::Result<()> {
 
     print_launch_resume(&opt, analytics.clone(), config_read_from);
 
+    tokio::spawn(async move {
+        tokio::signal::ctrl_c().await.unwrap();
+        std::process::exit(77);
+    });
+
     run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;
 
     Ok(())
diff --git a/crates/xtask/src/bench/meili_process.rs b/crates/xtask/src/bench/meili_process.rs
index db787e595..2aff679fc 100644
--- a/crates/xtask/src/bench/meili_process.rs
+++ b/crates/xtask/src/bench/meili_process.rs
@@ -1,23 +1,56 @@
 use std::collections::BTreeMap;
+use std::time::Duration;
 
 use anyhow::{bail, Context as _};
+use tokio::process::Command;
+use tokio::time;
 
 use super::assets::Asset;
 use super::client::Client;
 use super::workload::Workload;
 
 pub async fn kill(mut meilisearch: tokio::process::Child) {
-    if let Err(error) = meilisearch.kill().await {
-        tracing::warn!(
-            error = &error as &dyn std::error::Error,
-            "while terminating Meilisearch server"
-        )
+    let Some(id) = meilisearch.id() else { return };
+
+    match Command::new("kill").args(["--signal=TERM", &id.to_string()]).spawn() {
+        Ok(mut cmd) => {
+            let Err(error) = cmd.wait().await else { return };
+            tracing::warn!(
+                error = &error as &dyn std::error::Error,
+                "while awaiting the Meilisearch server kill"
+            );
+        }
+        Err(error) => {
+            tracing::warn!(
+                error = &error as &dyn std::error::Error,
+                "while terminating Meilisearch server with a kill -s TERM"
+            );
+            if let Err(error) = meilisearch.kill().await {
+                tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "while terminating Meilisearch server"
+                )
+            }
+            return;
+        }
+    };
+
+    match time::timeout(Duration::from_secs(5), meilisearch.wait()).await {
+        Ok(_) => (),
+        Err(_) => {
+            if let Err(error) = meilisearch.kill().await {
+                tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "while terminating Meilisearch server"
+                )
+            }
+        }
     }
 }
 
 #[tracing::instrument]
 pub async fn build() -> anyhow::Result<()> {
-    let mut command = tokio::process::Command::new("cargo");
+    let mut command = Command::new("cargo");
     command.arg("build").arg("--release").arg("-p").arg("meilisearch");
 
     command.kill_on_drop(true);
@@ -37,7 +70,7 @@ pub async fn start(
     master_key: Option<&str>,
     workload: &Workload,
     asset_folder: &str,
-    mut command: tokio::process::Command,
+    mut command: Command,
 ) -> anyhow::Result<tokio::process::Child> {
     command.arg("--db-path").arg("./_xtask_benchmark.ms");
     if let Some(master_key) = master_key {
@@ -77,7 +110,7 @@ async fn wait_for_health(
 
             return Ok(());
         }
-        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+        time::sleep(Duration::from_millis(500)).await;
         // check whether the Meilisearch instance exited early (cut the wait)
         if let Some(exit_code) =
             meilisearch.try_wait().context("cannot check Meilisearch server process status")?

From 9245c89cfef168fdf4f53a3424d4ed79aae756ab Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:00:46 +0100
Subject: [PATCH 138/158] move the macros to milli

---
 crates/index-scheduler/src/processing.rs | 49 ++---------------------
 crates/milli/src/progress.rs             | 51 ++++++++++++++++++++----
 2 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index 0bc449199..5212433ef 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -1,6 +1,9 @@
 use crate::utils::ProcessingBatch;
 use enum_iterator::Sequence;
-use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
+use meilisearch_types::milli::{
+    make_atomic_progress, make_enum_progress,
+    progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step},
+};
 use roaring::RoaringBitmap;
 use std::{borrow::Cow, sync::Arc};
 
@@ -55,50 +58,6 @@ impl ProcessingTasks {
     }
 }
 
-macro_rules! make_enum_progress {
-    ($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
-        #[repr(u8)]
-        #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
-        #[allow(clippy::enum_variant_names)]
-        $visibility enum $name {
-            $($variant),+
-        }
-
-        impl Step for $name {
-            fn name(&self) -> Cow<'static, str> {
-                use convert_case::Casing;
-
-                match self {
-                    $(
-                        $name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
-                    ),+
-                }
-            }
-
-            fn current(&self) -> u32 {
-                *self as u32
-            }
-
-            fn total(&self) -> u32 {
-                Self::CARDINALITY as u32
-            }
-        }
-    };
-}
-
-macro_rules! make_atomic_progress {
-    ($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
-        #[derive(Default, Debug, Clone, Copy)]
-        pub struct $struct_name {}
-        impl NamedStep for $struct_name {
-            fn name(&self) -> &'static str {
-                $step_name
-            }
-        }
-        pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
-    };
-}
-
 make_enum_progress! {
     pub enum BatchProgress {
         ProcessingTasks,
diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index 63f0fbef8..40a943bd3 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -91,16 +91,53 @@ impl<Name: NamedStep> Step for AtomicSubStep<Name> {
     }
 }
 
-#[derive(Default)]
-pub struct Document {}
+#[macro_export]
+macro_rules! make_enum_progress {
+    ($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
+        #[repr(u8)]
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
+        #[allow(clippy::enum_variant_names)]
+        $visibility enum $name {
+            $($variant),+
+        }
 
-impl NamedStep for Document {
-    fn name(&self) -> &'static str {
-        "document"
-    }
+        impl Step for $name {
+            fn name(&self) -> Cow<'static, str> {
+                use convert_case::Casing;
+
+                match self {
+                    $(
+                        $name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
+                    ),+
+                }
+            }
+
+            fn current(&self) -> u32 {
+                *self as u32
+            }
+
+            fn total(&self) -> u32 {
+                Self::CARDINALITY as u32
+            }
+        }
+    };
 }
 
-pub type AtomicDocumentStep = AtomicSubStep<Document>;
+#[macro_export]
+macro_rules! make_atomic_progress {
+    ($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
+        #[derive(Default, Debug, Clone, Copy)]
+        pub struct $struct_name {}
+        impl NamedStep for $struct_name {
+            fn name(&self) -> &'static str {
+                $step_name
+            }
+        }
+        pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
+    };
+}
+
+make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
 
 #[derive(Debug, Serialize, Clone)]
 pub struct ProgressView {

From c5536c37b59e0efaa6dcd7bc04d07b7bd8696f3c Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:03:06 +0100
Subject: [PATCH 139/158] rename the atomic::name to unit_name

---
 crates/milli/src/progress.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index 40a943bd3..6a4231e91 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -65,7 +65,7 @@ pub trait NamedStep: 'static + Send + Sync + Default {
 /// - The name of the step doesn't change
 /// - The total number of steps doesn't change
 pub struct AtomicSubStep<Name: NamedStep> {
-    name: Name,
+    unit_name: Name,
     current: Arc<AtomicU32>,
     total: u32,
 }
@@ -73,13 +73,13 @@ pub struct AtomicSubStep<Name: NamedStep> {
 impl<Name: NamedStep> AtomicSubStep<Name> {
     pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
         let current = Arc::new(AtomicU32::new(0));
-        (current.clone(), Self { current, total, name: Name::default() })
+        (current.clone(), Self { current, total, unit_name: Name::default() })
     }
 }
 
 impl<Name: NamedStep> Step for AtomicSubStep<Name> {
     fn name(&self) -> Cow<'static, str> {
-        self.name.name().into()
+        self.unit_name.name().into()
     }
 
     fn current(&self) -> u32 {

From 85577e70cd47f39a2b891d96f96cc3467ce6d1ae Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:05:34 +0100
Subject: [PATCH 140/158] reuse the enqueued

---
 crates/index-scheduler/src/batch.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 733984043..d05af31c3 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -695,7 +695,7 @@ impl IndexScheduler {
                 let (atomic, update_file_progress) =
                     AtomicUpdateFileStep::new(enqueued.len() as u32);
                 progress.update_progress(update_file_progress);
-                for task_id in self.get_status(&rtxn, Status::Enqueued)? {
+                for task_id in enqueued {
                     let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
                     if let Some(content_uuid) = task.content_uuid() {
                         let src = self.file_store.get_update_path(content_uuid);

From f1beb60204e32800c00c47eb86e23fee2082edd8 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:07:45 +0100
Subject: [PATCH 141/158] make the progress use payload instead of documents

---
 crates/milli/src/progress.rs                              | 1 +
 crates/milli/src/update/new/indexer/document_operation.rs | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index 6a4231e91..8243ec235 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -138,6 +138,7 @@ macro_rules! make_atomic_progress {
 }
 
 make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
+make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
 
 #[derive(Debug, Serialize, Clone)]
 pub struct ProgressView {
diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs
index 4418944db..a1fc31f61 100644
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -15,7 +15,7 @@ use super::super::document_change::DocumentChange;
 use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use super::retrieve_or_guess_primary_key;
 use crate::documents::PrimaryKey;
-use crate::progress::{AtomicDocumentStep, Progress};
+use crate::progress::{AtomicDocumentStep, AtomicPayloadStep, Progress};
 use crate::update::new::document::Versions;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
@@ -71,7 +71,7 @@ impl<'pl> DocumentOperation<'pl> {
         let mut primary_key = None;
 
         let payload_count = operations.len();
-        let (step, progress_step) = AtomicDocumentStep::new(payload_count as u32);
+        let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
         progress.update_progress(progress_step);
 
         for (payload_index, operation) in operations.into_iter().enumerate() {

From 5d682b4700789bdb91215d83622aa3e1d4c062c1 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:08:45 +0100
Subject: [PATCH 142/158] rename the ComputingTheChanges to
 ComputingDocumentChanges

---
 crates/index-scheduler/src/batch.rs      | 2 +-
 crates/index-scheduler/src/processing.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index d05af31c3..9ad43f192 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -1359,7 +1359,7 @@ impl IndexScheduler {
                     }
                 };
 
-                progress.update_progress(DocumentOperationProgress::ComputingTheChanges);
+                progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
                 let (document_changes, operation_stats, primary_key) = indexer
                     .into_changes(
                         &indexer_alloc,
diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index 5212433ef..89bec97e9 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -139,7 +139,7 @@ make_enum_progress! {
 make_enum_progress! {
     pub enum DocumentOperationProgress {
         RetrievingConfig,
-        ComputingTheChanges,
+        ComputingDocumentChanges,
         Indexing,
     }
 }

From ad4dc7072028d4361d77ac682dfa61afc80a20f7 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:09:54 +0100
Subject: [PATCH 143/158] rename the ComputingTheChanges to
 ComputingDocumentChanges in the edit document progress

---
 crates/index-scheduler/src/batch.rs      | 2 +-
 crates/index-scheduler/src/processing.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 9ad43f192..a40eac02c 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -1509,7 +1509,7 @@ impl IndexScheduler {
                     };
 
                     let candidates_count = candidates.len();
-                    progress.update_progress(DocumentEditionProgress::ComputingTheChanges);
+                    progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
                     let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
                     let document_changes = pool
                         .install(|| {
diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index 89bec97e9..57d90a40b 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -147,7 +147,7 @@ make_enum_progress! {
 make_enum_progress! {
     pub enum DocumentEditionProgress {
         RetrievingConfig,
-        ComputingTheChanges,
+        ComputingDocumentChanges,
         Indexing,
     }
 }

From 29fc77ee5b81c9768d5da72ffa228384c79fe545 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:11:19 +0100
Subject: [PATCH 144/158] remove usuless print

---
 crates/index-scheduler/src/utils.rs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs
index 3718c69ca..1fcedfddf 100644
--- a/crates/index-scheduler/src/utils.rs
+++ b/crates/index-scheduler/src/utils.rs
@@ -276,7 +276,6 @@ impl IndexScheduler {
             .map(|batch_id| {
                 if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
                     let mut batch = processing.batch.as_ref().unwrap().to_batch();
-                    println!("here with progress: {}", processing.progress.is_some());
                     batch.progress = processing.get_progress_view();
                     Ok(batch)
                 } else {

From fa885e75b42a2312c6efcdcd315e61daf5bb622f Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:13:12 +0100
Subject: [PATCH 145/158] rename the send_progress in progress

---
 crates/milli/src/update/new/indexer/document_changes.rs | 6 +++---
 crates/milli/src/update/new/indexer/mod.rs              | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/crates/milli/src/update/new/indexer/document_changes.rs b/crates/milli/src/update/new/indexer/document_changes.rs
index 3e2b9c036..a45fcee85 100644
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@@ -206,7 +206,7 @@ pub fn extract<
         doc_allocs,
         fields_ids_map_store,
         must_stop_processing,
-        progress: send_progress,
+        progress,
     }: IndexingContext<'fid, 'indexer, 'index, MSP>,
     extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
     datastore: &'data ThreadLocal<EX::Data>,
@@ -217,7 +217,7 @@ where
     MSP: Fn() -> bool + Sync,
 {
     tracing::trace!("We are resetting the extractor allocators");
-    send_progress.update_progress(step);
+    progress.update_progress(step);
     // Clean up and reuse the extractor allocs
     for extractor_alloc in extractor_allocs.iter_mut() {
         tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@@ -226,7 +226,7 @@ where
 
     let total_documents = document_changes.len() as u32;
     let (step, progress_step) = AtomicDocumentStep::new(total_documents);
-    send_progress.update_progress(progress_step);
+    progress.update_progress(progress_step);
 
     let pi = document_changes.iter(CHUNK_SIZE);
     pi.try_arc_for_each_try_init(
diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs
index acdf78304..a850c0d03 100644
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -72,7 +72,7 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
     document_changes: &DC,
     embedders: EmbeddingConfigs,
     must_stop_processing: &'indexer MSP,
-    send_progress: &'indexer Progress,
+    progress: &'indexer Progress,
 ) -> Result<()>
 where
     DC: DocumentChanges<'pl>,
@@ -125,7 +125,7 @@ where
         doc_allocs: &doc_allocs,
         fields_ids_map_store: &fields_ids_map_store,
         must_stop_processing,
-        progress: send_progress,
+        progress,
     };
 
     let mut index_embeddings = index.embedding_configs(wtxn)?;

From 45d5d4bf40450b9010ef2b935393c60b0068c4e0 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:15:33 +0100
Subject: [PATCH 146/158] make the progressview public

---
 crates/milli/src/progress.rs | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index 8243ec235..3c7a35c89 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -141,14 +141,16 @@ make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
 make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
 
 #[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "camelCase")]
 pub struct ProgressView {
-    steps: Vec<ProgressStepView>,
-    percentage: f32,
+    pub steps: Vec<ProgressStepView>,
+    pub percentage: f32,
 }
 
 #[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "camelCase")]
 pub struct ProgressStepView {
-    name: Cow<'static, str>,
-    finished: u32,
-    total: u32,
+    pub name: Cow<'static, str>,
+    pub finished: u32,
+    pub total: u32,
 }

From ab9213fa942b1037478fd3257b618faee01d22bc Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:16:20 +0100
Subject: [PATCH 147/158] ensure we never write the progress to the db

---
 crates/meilisearch-types/src/batches.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilisearch-types/src/batches.rs b/crates/meilisearch-types/src/batches.rs
index 57c609320..34af21f60 100644
--- a/crates/meilisearch-types/src/batches.rs
+++ b/crates/meilisearch-types/src/batches.rs
@@ -16,7 +16,7 @@ pub type BatchId = u32;
 pub struct Batch {
     pub uid: BatchId,
 
-    #[serde(skip_deserializing)]
+    #[serde(skip)]
     pub progress: Option<ProgressView>,
     pub details: DetailsView,
     pub stats: BatchStats,

From 75d5cea62470c17a3341c40f0eeefbcf81590f9d Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:17:33 +0100
Subject: [PATCH 148/158] use a with_capacity while allocating the progress
 view

---
 crates/milli/src/progress.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index 3c7a35c89..96483ebd0 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -37,7 +37,7 @@ impl Progress {
         let mut percentage = 0.0;
         let mut prev_factors = 1.0;
 
-        let mut step_view = Vec::new();
+        let mut step_view = Vec::with_capacity(steps.len());
         for (_, step) in steps.iter() {
             prev_factors *= step.total() as f32;
             percentage += step.current() as f32 / prev_factors;

From 08fd026ebdc8638ff283e8b301346f6d92219530 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:18:13 +0100
Subject: [PATCH 149/158] fix warning

---
 crates/milli/src/update/new/indexer/document_operation.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs
index a1fc31f61..090c1eb8e 100644
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -15,7 +15,7 @@ use super::super::document_change::DocumentChange;
 use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use super::retrieve_or_guess_primary_key;
 use crate::documents::PrimaryKey;
-use crate::progress::{AtomicDocumentStep, AtomicPayloadStep, Progress};
+use crate::progress::{AtomicPayloadStep, Progress};
 use crate::update::new::document::Versions;
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;

From 8cd3a1aa571f7a1489dc84ffdad6ce790279cba1 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:18:40 +0100
Subject: [PATCH 150/158] fmt

---
 crates/index-scheduler/src/error.rs           |  3 ++-
 .../index-scheduler/src/index_mapper/mod.rs   |  9 ++++----
 crates/index-scheduler/src/processing.rs      | 13 ++++++-----
 crates/meilisearch-types/src/batch_view.rs    |  8 +++----
 crates/meilisearch-types/src/batches.rs       |  6 ++---
 crates/meilisearch/src/routes/batches.rs      | 22 +++++++++----------
 crates/meilitool/src/upgrade/mod.rs           |  1 -
 crates/meilitool/src/upgrade/v1_10.rs         | 17 +++++---------
 crates/meilitool/src/upgrade/v1_11.rs         | 10 ++++-----
 crates/meilitool/src/upgrade/v1_12.rs         |  3 ++-
 crates/milli/src/progress.rs                  | 12 ++++------
 11 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs
index 5fb04828c..69da70a7e 100644
--- a/crates/index-scheduler/src/error.rs
+++ b/crates/index-scheduler/src/error.rs
@@ -1,12 +1,13 @@
 use std::fmt::Display;
 
-use crate::TaskId;
 use meilisearch_types::batches::BatchId;
 use meilisearch_types::error::{Code, ErrorCode};
 use meilisearch_types::tasks::{Kind, Status};
 use meilisearch_types::{heed, milli};
 use thiserror::Error;
 
+use crate::TaskId;
+
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum DateField {
     BeforeEnqueuedAt,
diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs
index 8b9ef3597..2f5b176ed 100644
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@@ -3,10 +3,6 @@ use std::sync::{Arc, RwLock};
 use std::time::Duration;
 use std::{fs, thread};
 
-use self::index_map::IndexMap;
-use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
-use crate::uuid_codec::UuidCodec;
-use crate::{Error, Result};
 use meilisearch_types::heed::types::{SerdeJson, Str};
 use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
 use meilisearch_types::milli;
@@ -17,6 +13,11 @@ use time::OffsetDateTime;
 use tracing::error;
 use uuid::Uuid;
 
+use self::index_map::IndexMap;
+use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
+use crate::uuid_codec::UuidCodec;
+use crate::{Error, Result};
+
 mod index_map;
 
 const INDEX_MAPPING: &str = "index-mapping";
diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index 57d90a40b..74802831e 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -1,11 +1,12 @@
-use crate::utils::ProcessingBatch;
+use std::borrow::Cow;
+use std::sync::Arc;
+
 use enum_iterator::Sequence;
-use meilisearch_types::milli::{
-    make_atomic_progress, make_enum_progress,
-    progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step},
-};
+use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
+use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
 use roaring::RoaringBitmap;
-use std::{borrow::Cow, sync::Arc};
+
+use crate::utils::ProcessingBatch;
 
 #[derive(Clone)]
 pub struct ProcessingTasks {
diff --git a/crates/meilisearch-types/src/batch_view.rs b/crates/meilisearch-types/src/batch_view.rs
index a3d7f834f..08d25413c 100644
--- a/crates/meilisearch-types/src/batch_view.rs
+++ b/crates/meilisearch-types/src/batch_view.rs
@@ -2,11 +2,9 @@ use milli::progress::ProgressView;
 use serde::Serialize;
 use time::{Duration, OffsetDateTime};
 
-use crate::{
-    batches::{Batch, BatchId, BatchStats},
-    task_view::DetailsView,
-    tasks::serialize_duration,
-};
+use crate::batches::{Batch, BatchId, BatchStats};
+use crate::task_view::DetailsView;
+use crate::tasks::serialize_duration;
 
 #[derive(Debug, Clone, Serialize)]
 #[serde(rename_all = "camelCase")]
diff --git a/crates/meilisearch-types/src/batches.rs b/crates/meilisearch-types/src/batches.rs
index 34af21f60..664dafa7a 100644
--- a/crates/meilisearch-types/src/batches.rs
+++ b/crates/meilisearch-types/src/batches.rs
@@ -4,10 +4,8 @@ use milli::progress::ProgressView;
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 
-use crate::{
-    task_view::DetailsView,
-    tasks::{Kind, Status},
-};
+use crate::task_view::DetailsView;
+use crate::tasks::{Kind, Status};
 
 pub type BatchId = u32;
 
diff --git a/crates/meilisearch/src/routes/batches.rs b/crates/meilisearch/src/routes/batches.rs
index 6faedc021..4d42cdd16 100644
--- a/crates/meilisearch/src/routes/batches.rs
+++ b/crates/meilisearch/src/routes/batches.rs
@@ -1,18 +1,18 @@
-use actix_web::{
-    web::{self, Data},
-    HttpResponse,
-};
+use actix_web::web::{self, Data};
+use actix_web::HttpResponse;
 use deserr::actix_web::AwebQueryParameter;
 use index_scheduler::{IndexScheduler, Query};
-use meilisearch_types::{
-    batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError,
-    keys::actions,
-};
+use meilisearch_types::batch_view::BatchView;
+use meilisearch_types::batches::BatchId;
+use meilisearch_types::deserr::DeserrQueryParamError;
+use meilisearch_types::error::ResponseError;
+use meilisearch_types::keys::actions;
 use serde::Serialize;
 
-use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler};
-
-use super::{tasks::TasksFilterQuery, ActionPolicy};
+use super::tasks::TasksFilterQuery;
+use super::ActionPolicy;
+use crate::extractors::authentication::GuardedData;
+use crate::extractors::sequential_extractor::SeqHandler;
 
 pub fn configure(cfg: &mut web::ServiceConfig) {
     cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
diff --git a/crates/meilitool/src/upgrade/mod.rs b/crates/meilitool/src/upgrade/mod.rs
index 50882f610..14f941311 100644
--- a/crates/meilitool/src/upgrade/mod.rs
+++ b/crates/meilitool/src/upgrade/mod.rs
@@ -7,7 +7,6 @@ use std::path::{Path, PathBuf};
 
 use anyhow::{bail, Context};
 use meilisearch_types::versioning::create_version_file;
-
 use v1_10::v1_9_to_v1_10;
 use v1_12::v1_11_to_v1_12;
 
diff --git a/crates/meilitool/src/upgrade/v1_10.rs b/crates/meilitool/src/upgrade/v1_10.rs
index 2efc1773c..4a49ea471 100644
--- a/crates/meilitool/src/upgrade/v1_10.rs
+++ b/crates/meilitool/src/upgrade/v1_10.rs
@@ -1,18 +1,13 @@
-use anyhow::bail;
 use std::path::Path;
 
-use anyhow::Context;
-use meilisearch_types::{
-    heed::{
-        types::{SerdeJson, Str},
-        Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
-    },
-    milli::index::{db_name, main_key},
-};
-
-use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+use anyhow::{bail, Context};
+use meilisearch_types::heed::types::{SerdeJson, Str};
+use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
+use meilisearch_types::milli::index::{db_name, main_key};
 
 use super::v1_9;
+use crate::uuid_codec::UuidCodec;
+use crate::{try_opening_database, try_opening_poly_database};
 
 pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
 
diff --git a/crates/meilitool/src/upgrade/v1_11.rs b/crates/meilitool/src/upgrade/v1_11.rs
index 0c84d3842..92d853dd0 100644
--- a/crates/meilitool/src/upgrade/v1_11.rs
+++ b/crates/meilitool/src/upgrade/v1_11.rs
@@ -7,12 +7,12 @@
 use std::path::Path;
 
 use anyhow::Context;
-use meilisearch_types::{
-    heed::{types::Str, Database, EnvOpenOptions},
-    milli::index::db_name,
-};
+use meilisearch_types::heed::types::Str;
+use meilisearch_types::heed::{Database, EnvOpenOptions};
+use meilisearch_types::milli::index::db_name;
 
-use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+use crate::uuid_codec::UuidCodec;
+use crate::{try_opening_database, try_opening_poly_database};
 
 pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
     println!("Upgrading from v1.10.0 to v1.11.0");
diff --git a/crates/meilitool/src/upgrade/v1_12.rs b/crates/meilitool/src/upgrade/v1_12.rs
index 85fb41472..444617375 100644
--- a/crates/meilitool/src/upgrade/v1_12.rs
+++ b/crates/meilitool/src/upgrade/v1_12.rs
@@ -1,7 +1,8 @@
 //! The breaking changes that happened between the v1.11 and the v1.12 are:
 //! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
 
-use std::{io::BufWriter, path::Path};
+use std::io::BufWriter;
+use std::path::Path;
 
 use anyhow::Context;
 use file_store::FileStore;
diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index 96483ebd0..d50be43cb 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -1,11 +1,7 @@
-use std::{
-    any::TypeId,
-    borrow::Cow,
-    sync::{
-        atomic::{AtomicU32, Ordering},
-        Arc, RwLock,
-    },
-};
+use std::any::TypeId;
+use std::borrow::Cow;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::{Arc, RwLock};
 
 use serde::Serialize;
 

From d12364c1e0a22246652db2497f813031769adb76 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:30:48 +0100
Subject: [PATCH 151/158] fix the tests

---
 crates/index-scheduler/src/lib.rs | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index d3e65c6f8..f5f73087d 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -4308,16 +4308,6 @@ mod tests {
         snapshot!(batch, @r#"
         {
           "uid": 0,
-          "progress": {
-            "steps": [
-              {
-                "name": "processing tasks",
-                "finished": 0,
-                "total": 2
-              }
-            ],
-            "percentage": 0.0
-          },
           "details": {
             "primaryKey": "mouse"
           },

From 0d0c18f519e44ab30d1b4d91dc2cd0f0b63d9275 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 11 Dec 2024 18:41:03 +0100
Subject: [PATCH 152/158] rename the Step::name into Step::current_step

---
 crates/index-scheduler/src/processing.rs | 18 +++++++++---------
 crates/milli/src/progress.rs             |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/crates/index-scheduler/src/processing.rs b/crates/index-scheduler/src/processing.rs
index 74802831e..aca654de9 100644
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@@ -215,7 +215,7 @@ mod test {
         {
           "steps": [
             {
-              "name": "processing tasks",
+              "currentStep": "processing tasks",
               "finished": 0,
               "total": 2
             }
@@ -228,7 +228,7 @@ mod test {
         {
           "steps": [
             {
-              "name": "writing tasks to disk",
+              "currentStep": "writing tasks to disk",
               "finished": 1,
               "total": 2
             }
@@ -248,12 +248,12 @@ mod test {
         {
           "steps": [
             {
-              "name": "processing tasks",
+              "currentStep": "processing tasks",
               "finished": 0,
               "total": 2
             },
             {
-              "name": "task",
+              "currentStep": "task",
               "finished": 0,
               "total": 10
             }
@@ -266,12 +266,12 @@ mod test {
         {
           "steps": [
             {
-              "name": "processing tasks",
+              "currentStep": "processing tasks",
               "finished": 0,
               "total": 2
             },
             {
-              "name": "task",
+              "currentStep": "task",
               "finished": 6,
               "total": 10
             }
@@ -284,7 +284,7 @@ mod test {
         {
           "steps": [
             {
-              "name": "writing tasks to disk",
+              "currentStep": "writing tasks to disk",
               "finished": 1,
               "total": 2
             }
@@ -299,12 +299,12 @@ mod test {
         {
           "steps": [
             {
-              "name": "writing tasks to disk",
+              "currentStep": "writing tasks to disk",
               "finished": 1,
               "total": 2
             },
             {
-              "name": "task",
+              "currentStep": "task",
               "finished": 4,
               "total": 5
             }
diff --git a/crates/milli/src/progress.rs b/crates/milli/src/progress.rs
index d50be43cb..accc2cf56 100644
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@@ -39,7 +39,7 @@ impl Progress {
             percentage += step.current() as f32 / prev_factors;
 
             step_view.push(ProgressStepView {
-                name: step.name(),
+                current_step: step.name(),
                 finished: step.current(),
                 total: step.total(),
             });
@@ -146,7 +146,7 @@ pub struct ProgressView {
 #[derive(Debug, Serialize, Clone)]
 #[serde(rename_all = "camelCase")]
 pub struct ProgressStepView {
-    pub name: Cow<'static, str>,
+    pub current_step: Cow<'static, str>,
     pub finished: u32,
     pub total: u32,
 }

From 1fdfa3f20885abe5d6dcd95eda0f7e4b2678cdd1 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Thu, 12 Dec 2024 09:26:14 +0100
Subject: [PATCH 153/158] Change the exit code to 130 when Ctrl-Ced

---
 crates/meilisearch/src/main.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/meilisearch/src/main.rs b/crates/meilisearch/src/main.rs
index 6e6245c78..ee3bbf430 100644
--- a/crates/meilisearch/src/main.rs
+++ b/crates/meilisearch/src/main.rs
@@ -131,7 +131,7 @@ async fn try_main() -> anyhow::Result<()> {
 
     tokio::spawn(async move {
         tokio::signal::ctrl_c().await.unwrap();
-        std::process::exit(77);
+        std::process::exit(130);
     });
 
     run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;

From 6c72559457366da88acf191e1844cb1d353b5127 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Thu, 12 Dec 2024 09:27:10 +0100
Subject: [PATCH 154/158] Update the binary-path description

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
---
 crates/xtask/src/bench/mod.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crates/xtask/src/bench/mod.rs b/crates/xtask/src/bench/mod.rs
index 491dc33ab..1416c21d9 100644
--- a/crates/xtask/src/bench/mod.rs
+++ b/crates/xtask/src/bench/mod.rs
@@ -87,7 +87,9 @@ pub struct BenchDeriveArgs {
     #[arg(long, default_value_t = 60)]
     tasks_queue_timeout_secs: u64,
 
-    /// The path to the binary to run. By default it compiles the binary with cargo.
+    /// The path to the binary to run.
+    ///
+    /// If unspecified, runs `cargo run` after building Meilisearch with `cargo build`.
     #[arg(long)]
     binary_path: Option<PathBuf>,
 }

From 18ce95dcbf5c8c8ae7527887ae2abf3cb2b1c7a7 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 12 Dec 2024 14:56:45 +0100
Subject: [PATCH 155/158] Add test reproducing the bug

---
 .../meilisearch/tests/search/facet_search.rs  | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs
index 19224c3df..696c23f91 100644
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@@ -57,6 +57,116 @@ async fn simple_facet_search() {
     assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
 }
 
+#[actix_rt::test]
+async fn simple_facet_search_on_movies() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = json!([
+      {
+        "id": 1,
+        "title": "Carol",
+        "genres": [
+          "Romance",
+          "Drama"
+        ],
+        "color": [
+          "red"
+        ],
+        "platforms": [
+          "MacOS",
+          "Linux",
+          "Windows"
+        ]
+      },
+      {
+        "id": 2,
+        "title": "Wonder Woman",
+        "genres": [
+          "Action",
+          "Adventure"
+        ],
+        "color": [
+          "green"
+        ],
+        "platforms": [
+          "MacOS"
+        ]
+      },
+      {
+        "id": 3,
+        "title": "Life of Pi",
+        "genres": [
+          "Adventure",
+          "Drama"
+        ],
+        "color": [
+          "blue"
+        ],
+        "platforms": [
+          "Windows"
+        ]
+      },
+      {
+        "id": 4,
+        "title": "Mad Max: Fury Road",
+        "genres": [
+          "Adventure",
+          "Science Fiction"
+        ],
+        "color": [
+          "red"
+        ],
+        "platforms": [
+          "MacOS",
+          "Linux"
+        ]
+      },
+      {
+        "id": 5,
+        "title": "Moana",
+        "genres": [
+          "Fantasy",
+          "Action"
+        ],
+        "color": [
+          "red"
+        ],
+        "platforms": [
+          "Windows"
+        ]
+      },
+      {
+        "id": 6,
+        "title": "Philadelphia",
+        "genres": [
+          "Drama"
+        ],
+        "color": [
+          "blue"
+        ],
+        "platforms": [
+          "MacOS",
+          "Linux",
+          "Windows"
+        ]
+      }
+    ]);
+    let (response, code) =
+        index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetQuery": "", "facetName": "genres", "q": "" })).await;
+
+    assert_eq!(code, 200, "{}", response);
+    snapshot!(response["facetHits"], @r###"[{"value":"Action","count":2},{"value":"Adventure","count":3},{"value":"Drama","count":3},{"value":"Fantasy","count":1},{"value":"Romance","count":1},{"value":"Science Fiction","count":1}]"###);
+}
+
 #[actix_rt::test]
 async fn advanced_facet_search() {
     let server = Server::new().await;

From 961de4d34ea3821ba24df8c376b6e6cf0d5a307a Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 12 Dec 2024 14:56:59 +0100
Subject: [PATCH 156/158] Fix facet fst

---
 .../milli/src/update/new/facet_search_builder.rs  | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/crates/milli/src/update/new/facet_search_builder.rs b/crates/milli/src/update/new/facet_search_builder.rs
index 1993c1d00..d1ff6096d 100644
--- a/crates/milli/src/update/new/facet_search_builder.rs
+++ b/crates/milli/src/update/new/facet_search_builder.rs
@@ -103,6 +103,8 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
 
     #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")]
     pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> {
+        tracing::trace!("merge facet strings for facet search: {:?}", self.registered_facets);
+
         let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?;
         let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString);
         builder.extend(reader);
@@ -118,12 +120,15 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
                 BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;
 
             if current_field_id != Some(field_id) {
-                if let Some(fst_merger_builder) = fst_merger_builder {
+                if let (Some(current_field_id), Some(fst_merger_builder)) =
+                    (current_field_id, fst_merger_builder)
+                {
                     let mmap = fst_merger_builder.build(&mut callback)?;
-                    index
-                        .facet_id_string_fst
-                        .remap_data_type::<Bytes>()
-                        .put(wtxn, &field_id, &mmap)?;
+                    index.facet_id_string_fst.remap_data_type::<Bytes>().put(
+                        wtxn,
+                        &current_field_id,
+                        &mmap,
+                    )?;
                 }
 
                 fst = index.facet_id_string_fst.get(rtxn, &field_id)?;

From 2f3cc8cdd2505fc9ba9b6bc435ca822101a54542 Mon Sep 17 00:00:00 2001
From: Kerollmops <clement@meilisearch.com>
Date: Thu, 12 Dec 2024 16:15:37 +0100
Subject: [PATCH 157/158] Fix the merge_caches_sorted function

---
 crates/milli/src/update/new/extract/cache.rs | 57 +++++++++-----------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index 09ca60211..62c00d2b1 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -477,21 +477,16 @@ where
     F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
 {
     let mut maps = Vec::new();
-    let mut readers = Vec::new();
-    let mut current_bucket = None;
-    for FrozenCache { bucket, cache, ref mut spilled } in frozen {
-        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
-        maps.push(cache);
-        readers.append(spilled);
-    }
-
-    // First manage the spilled entries by looking into the HashMaps,
-    // merge them and mark them as dummy.
     let mut heap = BinaryHeap::new();
-    for (source_index, source) in readers.into_iter().enumerate() {
-        let mut cursor = source.into_cursor()?;
-        if cursor.move_on_next()?.is_some() {
-            heap.push(Entry { cursor, source_index });
+    let mut current_bucket = None;
+    for FrozenCache { bucket, cache, spilled } in frozen {
+        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
+        maps.push((bucket, cache));
+        for reader in spilled {
+            let mut cursor = reader.into_cursor()?;
+            if cursor.move_on_next()?.is_some() {
+                heap.push(Entry { cursor, bucket });
+            }
         }
     }
 
@@ -508,25 +503,25 @@ where
 
         let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
         while let Some(mut entry) = heap.peek_mut() {
-            if let Some((key, _value)) = entry.cursor.current() {
-                if first_key == key {
-                    let new = DelAddRoaringBitmap::from_bytes(first_value)?;
-                    output = output.merge(new);
-                    // When we are done we the current value of this entry move make
-                    // it move forward and let the heap reorganize itself (on drop)
-                    if entry.cursor.move_on_next()?.is_none() {
-                        PeekMut::pop(entry);
-                    }
-                } else {
+            if let Some((key, value)) = entry.cursor.current() {
+                if first_key != key {
                     break;
                 }
+
+                let new = DelAddRoaringBitmap::from_bytes(value)?;
+                output = output.merge(new);
+                // When we are done we the current value of this entry move make
+                // it move forward and let the heap reorganize itself (on drop)
+                if entry.cursor.move_on_next()?.is_none() {
+                    PeekMut::pop(entry);
+                }
             }
         }
 
         // Once we merged all of the spilled bitmaps we must also
         // fetch the entries from the non-spilled entries (the HashMaps).
-        for (map_index, map) in maps.iter_mut().enumerate() {
-            if first_entry.source_index != map_index {
+        for (map_bucket, map) in maps.iter_mut() {
+            if first_entry.bucket != *map_bucket {
                 if let Some(new) = map.get_mut(first_key) {
                     output.union_and_clear_bbbul(new);
                 }
@@ -538,12 +533,12 @@ where
 
         // Don't forget to put the first entry back into the heap.
         if first_entry.cursor.move_on_next()?.is_some() {
-            heap.push(first_entry)
+            heap.push(first_entry);
         }
     }
 
     // Then manage the content on the HashMap entries that weren't taken (mem::take).
-    while let Some(mut map) = maps.pop() {
+    while let Some((_, mut map)) = maps.pop() {
         // Make sure we don't try to work with entries already managed by the spilled
         let mut ordered_entries: Vec<_> =
             map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
@@ -553,7 +548,7 @@ where
             let mut output = DelAddRoaringBitmap::empty();
             output.union_and_clear_bbbul(bbbul);
 
-            for rhs in maps.iter_mut() {
+            for (_, rhs) in maps.iter_mut() {
                 if let Some(new) = rhs.get_mut(key) {
                     output.union_and_clear_bbbul(new);
                 }
@@ -569,14 +564,14 @@ where
 
 struct Entry<R> {
     cursor: ReaderCursor<R>,
-    source_index: usize,
+    bucket: usize,
 }
 
 impl<R> Ord for Entry<R> {
     fn cmp(&self, other: &Entry<R>) -> Ordering {
         let skey = self.cursor.current().map(|(k, _)| k);
         let okey = other.cursor.current().map(|(k, _)| k);
-        skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse()
+        skey.cmp(&okey).then(self.bucket.cmp(&other.bucket)).reverse()
     }
 }
 

From acdd5aa6ea143b2b92079e50cc0e22afeebee570 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 12 Dec 2024 17:54:28 +0100
Subject: [PATCH 158/158] Use the thread source id instead of the destination
 id when filtering on the cache to merge

---
 crates/milli/src/update/new/extract/cache.rs | 44 ++++++++++++--------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/crates/milli/src/update/new/extract/cache.rs b/crates/milli/src/update/new/extract/cache.rs
index 62c00d2b1..e2c8bb5fe 100644
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@@ -177,12 +177,12 @@ impl<'extractor> BalancedCaches<'extractor> {
         Ok(())
     }
 
-    pub fn freeze(&mut self) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
+    pub fn freeze(&mut self, source_id: usize) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
         match &mut self.caches {
             InnerCaches::Normal(NormalCaches { caches }) => caches
                 .iter_mut()
                 .enumerate()
-                .map(|(bucket, map)| {
+                .map(|(bucket_id, map)| {
                     // safety: we are transmuting the Bbbul into a FrozenBbbul
                     //         that are the same size.
                     let map = unsafe {
@@ -201,14 +201,19 @@ impl<'extractor> BalancedCaches<'extractor> {
                             >,
                         >(map)
                     };
-                    Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() })
+                    Ok(FrozenCache {
+                        source_id,
+                        bucket_id,
+                        cache: FrozenMap::new(map),
+                        spilled: Vec::new(),
+                    })
                 })
                 .collect(),
             InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
                 .iter_mut()
                 .zip(mem::take(spilled_entries))
                 .enumerate()
-                .map(|(bucket, (map, sorter))| {
+                .map(|(bucket_id, (map, sorter))| {
                     let spilled = sorter
                         .into_reader_cursors()?
                         .into_iter()
@@ -234,7 +239,7 @@ impl<'extractor> BalancedCaches<'extractor> {
                             >,
                         >(map)
                     };
-                    Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled })
+                    Ok(FrozenCache { source_id, bucket_id, cache: FrozenMap::new(map), spilled })
                 })
                 .collect(),
         }
@@ -440,7 +445,8 @@ fn spill_entry_to_sorter(
 }
 
 pub struct FrozenCache<'a, 'extractor> {
-    bucket: usize,
+    bucket_id: usize,
+    source_id: usize,
     cache: FrozenMap<
         'a,
         'extractor,
@@ -457,9 +463,9 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
     let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0);
     let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect();
 
-    for thread_cache in caches {
-        for frozen in thread_cache.freeze()? {
-            bucket_caches[frozen.bucket].push(frozen);
+    for (thread_index, thread_cache) in caches.iter_mut().enumerate() {
+        for frozen in thread_cache.freeze(thread_index)? {
+            bucket_caches[frozen.bucket_id].push(frozen);
         }
     }
 
@@ -479,13 +485,13 @@ where
     let mut maps = Vec::new();
     let mut heap = BinaryHeap::new();
     let mut current_bucket = None;
-    for FrozenCache { bucket, cache, spilled } in frozen {
-        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
-        maps.push((bucket, cache));
+    for FrozenCache { source_id, bucket_id, cache, spilled } in frozen {
+        assert_eq!(*current_bucket.get_or_insert(bucket_id), bucket_id);
+        maps.push((source_id, cache));
         for reader in spilled {
             let mut cursor = reader.into_cursor()?;
             if cursor.move_on_next()?.is_some() {
-                heap.push(Entry { cursor, bucket });
+                heap.push(Entry { cursor, source_id });
             }
         }
     }
@@ -520,8 +526,12 @@ where
 
         // Once we merged all of the spilled bitmaps we must also
         // fetch the entries from the non-spilled entries (the HashMaps).
-        for (map_bucket, map) in maps.iter_mut() {
-            if first_entry.bucket != *map_bucket {
+        for (source_id, map) in maps.iter_mut() {
+            debug_assert!(
+                !(map.get(first_key).is_some() && first_entry.source_id == *source_id),
+                "A thread should not have spiled a key that has been inserted in the cache"
+            );
+            if first_entry.source_id != *source_id {
                 if let Some(new) = map.get_mut(first_key) {
                     output.union_and_clear_bbbul(new);
                 }
@@ -564,14 +574,14 @@ where
 
 struct Entry<R> {
     cursor: ReaderCursor<R>,
-    bucket: usize,
+    source_id: usize,
 }
 
 impl<R> Ord for Entry<R> {
     fn cmp(&self, other: &Entry<R>) -> Ordering {
         let skey = self.cursor.current().map(|(k, _)| k);
         let okey = other.cursor.current().map(|(k, _)| k);
-        skey.cmp(&okey).then(self.bucket.cmp(&other.bucket)).reverse()
+        skey.cmp(&okey).then(self.source_id.cmp(&other.source_id)).reverse()
     }
 }