From 9df8cfc013452ecb5935d5501c96a4c465183a5d Mon Sep 17 00:00:00 2001
From: Paul Sanders <psanders1@gmail.com>
Date: Mon, 18 Dec 2023 13:05:46 -0500
Subject: [PATCH 01/41] Add libstdc++ in Dockerfile

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Dockerfile b/Dockerfile
index 2cc1dcaec..f6be39785 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,7 @@ ENV     MEILI_HTTP_ADDR 0.0.0.0:7700
 ENV     MEILI_SERVER_PROVIDER docker
 
 RUN     apk update --quiet \
-        && apk add -q --no-cache libgcc tini curl
+        && apk add -q --no-cache libgcc libstdc++ tini curl
 
 # add meilisearch and meilitool to the `/bin` so you can run it from anywhere
 # and it's easy to find.

From 942d49314c75d00aa5e43df220d5acfef47e6b64 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis.dureuil@xinra.net>
Date: Mon, 18 Dec 2023 22:14:26 +0100
Subject: [PATCH 02/41] Remove dependency that requires libstdc++

---
 Cargo.lock       | 5 -----
 milli/Cargo.toml | 2 +-
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d57d381ed..edcd26bf9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1592,9 +1592,6 @@ name = "esaxx-rs"
 version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
-dependencies = [
- "cc",
-]
 
 [[package]]
 name = "fancy-regex"
@@ -5310,11 +5307,9 @@ version = "0.14.1"
 source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
 dependencies = [
  "aho-corasick",
- "clap",
  "derive_builder",
  "esaxx-rs",
  "getrandom",
- "indicatif",
  "itertools 0.11.0",
  "lazy_static",
  "log",
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index b977d64f1..3e10c175a 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -77,7 +77,7 @@ csv = "1.2.1"
 candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
 candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
 candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
-tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1" }
+tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
 hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
     "online",
 ] }

From b2193e612ff447fe152b4fc990a48e76516b43d1 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis.dureuil@xinra.net>
Date: Mon, 18 Dec 2023 22:17:29 +0100
Subject: [PATCH 03/41] Revert "Add libstdc++ in Dockerfile" as it is no longer
 needed

This reverts commit 9df8cfc013452ecb5935d5501c96a4c465183a5d.
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index f6be39785..2cc1dcaec 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -26,7 +26,7 @@ ENV     MEILI_HTTP_ADDR 0.0.0.0:7700
 ENV     MEILI_SERVER_PROVIDER docker
 
 RUN     apk update --quiet \
-        && apk add -q --no-cache libgcc libstdc++ tini curl
+        && apk add -q --no-cache libgcc tini curl
 
 # add meilisearch and meilitool to the `/bin` so you can run it from anywhere
 # and it's easy to find.

From 1956045a0619c5f80551c6f662823158f0b6974b Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 27 Nov 2023 14:45:18 +0100
Subject: [PATCH 04/41] add the option

---
 meilisearch/src/analytics/segment_analytics.rs |  1 +
 meilisearch/src/option.rs                      | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 1ad277c28..6a617577b 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -290,6 +290,7 @@ impl From<Opt> for Infos {
             http_addr,
             master_key: _,
             env,
+            task_webhook_url: _,
             max_index_size: _,
             max_task_db_size: _,
             http_payload_size_limit,
diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index 1ed20f5b5..37472e174 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -28,6 +28,7 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
 const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
 const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
+const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
 #[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -156,6 +157,10 @@ pub struct Opt {
     #[serde(default = "default_env")]
     pub env: String,
 
+    /// Called whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
+    pub task_webhook_url: Option<String>,
+
     /// Deactivates Meilisearch's built-in telemetry when provided.
     ///
     /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -375,6 +380,7 @@ impl Opt {
             http_addr,
             master_key,
             env,
+            task_webhook_url,
             max_index_size: _,
             max_task_db_size: _,
             http_payload_size_limit,
@@ -409,6 +415,10 @@ impl Opt {
             export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
         }
         export_to_env_if_not_present(MEILI_ENV, env);
+        if let Some(task_webhook_url) = task_webhook_url {
+            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url);
+        }
+
         #[cfg(feature = "analytics")]
         {
             export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());

From d78ad51082c4cfc3598a0105655f25154c9e9b60 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 27 Nov 2023 15:11:22 +0100
Subject: [PATCH 05/41] Implement the webhook

---
 Cargo.lock                            |  1 +
 index-scheduler/Cargo.toml            |  1 +
 index-scheduler/src/insta_snapshot.rs |  1 +
 index-scheduler/src/lib.rs            | 35 ++++++++++++++++++++++++---
 meilisearch/src/lib.rs                |  1 +
 5 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index d57d381ed..ca7eb715f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2792,6 +2792,7 @@ dependencies = [
  "tempfile",
  "thiserror",
  "time",
+ "ureq",
  "uuid 1.5.0",
 ]
 
diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml
index c4a37b7d6..4d6e4ffd0 100644
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -30,6 +30,7 @@ synchronoise = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
 time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+ureq = "2.9.1"
 uuid = { version = "1.3.1", features = ["serde", "v4"] }
 
 [dev-dependencies]
diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs
index ddb9e934a..9261bf66d 100644
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -37,6 +37,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
         snapshots_path: _,
         auth_path: _,
         version_file_path: _,
+        webhook_url: _,
         test_breakpoint_sdr: _,
         planned_failures: _,
         run_loop_iteration: _,
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index b9b360fa4..6756990af 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -170,8 +170,8 @@ impl ProcessingTasks {
     }
 
     /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) {
-        self.processing = RoaringBitmap::new();
+    fn stop_processing(&mut self) -> RoaringBitmap {
+        std::mem::take(&mut self.processing)
     }
 
     /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@@ -241,6 +241,7 @@ pub struct IndexSchedulerOptions {
     pub snapshots_path: PathBuf,
     /// The path to the folder containing the dumps.
     pub dumps_path: PathBuf,
+    pub webhook_url: Option<String>,
     /// The maximum size, in bytes, of the task index.
     pub task_db_size: usize,
     /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -323,6 +324,9 @@ pub struct IndexScheduler {
     /// The maximum number of tasks that will be batched together.
     pub(crate) max_number_of_batched_tasks: usize,
 
+    /// The webhook url we should send tasks to after processing every batches.
+    pub(crate) webhook_url: Option<String>,
+
     /// A frame to output the indexation profiling files to disk.
     pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
 
@@ -388,6 +392,7 @@ impl IndexScheduler {
             dumps_path: self.dumps_path.clone(),
             auth_path: self.auth_path.clone(),
             version_file_path: self.version_file_path.clone(),
+            webhook_url: self.webhook_url.clone(),
             currently_updating_index: self.currently_updating_index.clone(),
             embedders: self.embedders.clone(),
             #[cfg(test)]
@@ -487,6 +492,7 @@ impl IndexScheduler {
             snapshots_path: options.snapshots_path,
             auth_path: options.auth_path,
             version_file_path: options.version_file_path,
+            webhook_url: options.webhook_url,
             currently_updating_index: Arc::new(RwLock::new(None)),
             embedders: Default::default(),
 
@@ -1251,19 +1257,41 @@ impl IndexScheduler {
             }
         }
 
-        self.processing_tasks.write().unwrap().stop_processing();
+        let processed = self.processing_tasks.write().unwrap().stop_processing();
 
         #[cfg(test)]
         self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;
 
         wtxn.commit().map_err(Error::HeedTransaction)?;
 
+        // We shouldn't crash the tick function if we can't send data to the webhook.
+        let _ = self.notify_webhook(&processed);
+
         #[cfg(test)]
         self.breakpoint(Breakpoint::AfterProcessing);
 
         Ok(TickOutcome::TickAgain(processed_tasks))
     }
 
+    /// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
+    fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
+        if let Some(ref url) = self.webhook_url {
+            let rtxn = self.env.read_txn()?;
+
+            // on average a task takes ~50 bytes
+            let mut buffer = Vec::with_capacity(updated.len() as usize * 50);
+
+            for id in updated {
+                let task = self.get_task(&rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
+                let _ = serde_json::to_writer(&mut buffer, &task);
+            }
+
+            let _ = ureq::post(url).send_bytes(&buffer);
+        }
+
+        Ok(())
+    }
+
     /// Register a task to cleanup the task queue if needed
     fn cleanup_task_queue(&self) -> Result<()> {
         let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@@ -1677,6 +1705,7 @@ mod tests {
                 indexes_path: tempdir.path().join("indexes"),
                 snapshots_path: tempdir.path().join("snapshots"),
                 dumps_path: tempdir.path().join("dumps"),
+                webhook_url: None,
                 task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                 index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                 enable_mdb_writemap: false,
diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs
index e0f488eea..14a1c5b45 100644
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -228,6 +228,7 @@ fn open_or_create_database_unchecked(
             indexes_path: opt.db_path.join("indexes"),
             snapshots_path: opt.snapshot_dir.clone(),
             dumps_path: opt.dump_dir.clone(),
+            webhook_url: opt.task_webhook_url.clone(),
             task_db_size: opt.max_task_db_size.get_bytes() as usize,
             index_base_map_size: opt.max_index_size.get_bytes() as usize,
             enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,

From 391eb72137e451ff4c6252c752b868695e4d74aa Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 28 Nov 2023 11:39:42 +0100
Subject: [PATCH 06/41] start writing a test with actix but it doesn't works

---
 meilisearch/tests/tasks/mod.rs     |   1 +
 meilisearch/tests/tasks/webhook.rs | 101 +++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+)
 create mode 100644 meilisearch/tests/tasks/webhook.rs

diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs
index 7a5fa6388..ed387224e 100644
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@@ -1,4 +1,5 @@
 mod errors;
+mod webhook;
 
 use meili_snap::insta::assert_json_snapshot;
 use time::format_description::well_known::Rfc3339;
diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs
new file mode 100644
index 000000000..34edb53eb
--- /dev/null
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -0,0 +1,101 @@
+use std::sync::Arc;
+
+use actix_http::body::MessageBody;
+use actix_web::dev::{ServiceFactory, ServiceResponse};
+use actix_web::web::{Bytes, Data};
+use actix_web::{post, App, HttpResponse, HttpServer};
+use meili_snap::snapshot;
+use meilisearch::Opt;
+use tokio::sync::mpsc;
+
+use crate::common::{default_settings, Server};
+use crate::json;
+
+#[post("/")]
+async fn forward_body(sender: Data<mpsc::Sender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+    println!("Received something");
+    let body = body.to_vec();
+    sender.send(body).await.unwrap();
+    HttpResponse::Ok().into()
+}
+
+fn create_app(
+    sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
+) -> actix_web::App<
+    impl ServiceFactory<
+        actix_web::dev::ServiceRequest,
+        Config = (),
+        Response = ServiceResponse<impl MessageBody>,
+        Error = actix_web::Error,
+        InitError = (),
+    >,
+> {
+    App::new().service(forward_body).app_data(Data::from(sender))
+}
+
+struct WebhookHandle {
+    pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
+    pub url: String,
+    pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
+}
+
+async fn create_webhook_server() -> WebhookHandle {
+    let (sender, receiver) = mpsc::unbounded_channel();
+    let sender = Arc::new(sender);
+
+    let server =
+        HttpServer::new(move || create_app(sender.clone())).bind(("localhost", 0)).unwrap();
+    let (ip, scheme) = server.addrs_with_scheme()[0];
+    let url = format!("{scheme}://{ip}/");
+
+    // TODO: Is it cleaned once the test is over
+    let server_handle = tokio::spawn(server.run());
+
+    WebhookHandle { server_handle, url, receiver }
+}
+
+#[actix_web::test]
+async fn test_basic_webhook() {
+    // Request a new server from the pool
+    let mut handle = create_webhook_server().await;
+
+    let db_path = tempfile::tempdir().unwrap();
+    // let (_handle, mut webhook) = create_webhook_server().await;
+    let server = Server::new_with_options(Opt {
+        task_webhook_url: Some(handle.url.clone()),
+        ..default_settings(db_path.path())
+    })
+    .await
+    .unwrap();
+
+    println!("Sending something");
+    reqwest::Client::new().post(&handle.url).body("hello").send().await.unwrap();
+
+    // let (_, status) = server.create_index(json!({ "uid": "tamo" })).await;
+    // snapshot!(status, @"202 Accepted");
+
+    let payload = handle.receiver.recv().await.unwrap();
+    let jsonl = String::from_utf8(payload).unwrap();
+
+    // TODO: kill the server
+    // handle.server_handle.;
+
+    snapshot!(jsonl,
+        @r###"
+    {
+      "uid": 0,
+      "indexUid": null,
+      "status": "succeeded",
+      "type": "dumpCreation",
+      "canceledBy": null,
+      "details": {
+        "dumpUid": "[dumpUid]"
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}

From fbea72137829e02ad0cc5e1df831ac0e449d4e9b Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 28 Nov 2023 14:47:07 +0100
Subject: [PATCH 07/41] add a first working test with actixweb

---
 index-scheduler/src/lib.rs         |  3 +-
 meilisearch/tests/tasks/webhook.rs | 46 ++++++++++++------------------
 2 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 6756990af..8502da242 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1286,7 +1286,8 @@ impl IndexScheduler {
                 let _ = serde_json::to_writer(&mut buffer, &task);
             }
 
-            let _ = ureq::post(url).send_bytes(&buffer);
+            println!("Sending request to {url}");
+            let _ = ureq::post(url).send_bytes(&buffer).unwrap();
         }
 
         Ok(())
diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs
index 34edb53eb..c95d3fc5b 100644
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -12,10 +12,10 @@ use crate::common::{default_settings, Server};
 use crate::json;
 
 #[post("/")]
-async fn forward_body(sender: Data<mpsc::Sender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
     println!("Received something");
     let body = body.to_vec();
-    sender.send(body).await.unwrap();
+    sender.send(body).unwrap();
     HttpResponse::Ok().into()
 }
 
@@ -40,13 +40,18 @@ struct WebhookHandle {
 }
 
 async fn create_webhook_server() -> WebhookHandle {
+    let mut log_builder = env_logger::Builder::new();
+    log_builder.parse_filters("debug");
+    log_builder.init();
+
     let (sender, receiver) = mpsc::unbounded_channel();
     let sender = Arc::new(sender);
 
     let server =
-        HttpServer::new(move || create_app(sender.clone())).bind(("localhost", 0)).unwrap();
+        HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
     let (ip, scheme) = server.addrs_with_scheme()[0];
     let url = format!("{scheme}://{ip}/");
+    println!("url is {url}");
 
     // TODO: Is it cleaned once the test is over
     let server_handle = tokio::spawn(server.run());
@@ -60,7 +65,6 @@ async fn test_basic_webhook() {
     let mut handle = create_webhook_server().await;
 
     let db_path = tempfile::tempdir().unwrap();
-    // let (_handle, mut webhook) = create_webhook_server().await;
     let server = Server::new_with_options(Opt {
         task_webhook_url: Some(handle.url.clone()),
         ..default_settings(db_path.path())
@@ -68,34 +72,20 @@ async fn test_basic_webhook() {
     .await
     .unwrap();
 
-    println!("Sending something");
-    reqwest::Client::new().post(&handle.url).body("hello").send().await.unwrap();
-
-    // let (_, status) = server.create_index(json!({ "uid": "tamo" })).await;
-    // snapshot!(status, @"202 Accepted");
+    let index = server.index("tamo");
+    for i in 0..3 {
+        let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
+    }
 
     let payload = handle.receiver.recv().await.unwrap();
     let jsonl = String::from_utf8(payload).unwrap();
 
-    // TODO: kill the server
-    // handle.server_handle.;
+    snapshot!(jsonl,
+        @r###"{"uid":0,"enqueuedAt":"2023-11-28T13:43:24.754587Z","startedAt":"2023-11-28T13:43:24.756445Z","finishedAt":"2023-11-28T13:43:24.791527Z","error":null,"canceledBy":null,"details":{"DocumentAdditionOrUpdate":{"received_documents":1,"indexed_documents":1}},"status":"succeeded","kind":{"documentAdditionOrUpdate":{"index_uid":"tamo","primary_key":null,"method":"ReplaceDocuments","content_file":"ca77ac82-4504-4c85-81a5-1a8d68f1a386","documents_count":1,"allow_index_creation":true}}}"###);
+
+    let payload = handle.receiver.recv().await.unwrap();
+    let jsonl = String::from_utf8(payload).unwrap();
 
     snapshot!(jsonl,
-        @r###"
-    {
-      "uid": 0,
-      "indexUid": null,
-      "status": "succeeded",
-      "type": "dumpCreation",
-      "canceledBy": null,
-      "details": {
-        "dumpUid": "[dumpUid]"
-      },
-      "error": null,
-      "duration": "[duration]",
-      "enqueuedAt": "[date]",
-      "startedAt": "[date]",
-      "finishedAt": "[date]"
-    }
-    "###);
+        @r###"{"uid":1,"enqueuedAt":"2023-11-28T13:43:24.761498Z","startedAt":"2023-11-28T13:43:24.793989Z","finishedAt":"2023-11-28T13:43:24.814623Z","error":null,"canceledBy":null,"details":{"DocumentAdditionOrUpdate":{"received_documents":1,"indexed_documents":1}},"status":"succeeded","kind":{"documentAdditionOrUpdate":{"index_uid":"tamo","primary_key":null,"method":"ReplaceDocuments","content_file":"c947aefa-7f98-433d-8ce4-5926d8d2ce10","documents_count":1,"allow_index_creation":true}}}{"uid":2,"enqueuedAt":"2023-11-28T13:43:24.76776Z","startedAt":"2023-11-28T13:43:24.793989Z","finishedAt":"2023-11-28T13:43:24.814623Z","error":null,"canceledBy":null,"details":{"DocumentAdditionOrUpdate":{"received_documents":1,"indexed_documents":1}},"status":"succeeded","kind":{"documentAdditionOrUpdate":{"index_uid":"tamo","primary_key":null,"method":"ReplaceDocuments","content_file":"a21d6da6-9322-4827-8c08-f33d2e1b6cae","documents_count":1,"allow_index_creation":true}}}"###);
 }

From 3adbc2b942e7bb0808720f46838aab2acef26e00 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 28 Nov 2023 15:08:13 +0100
Subject: [PATCH 08/41] return a task view instead of a task

---
 index-scheduler/src/lib.rs         |   4 +-
 meilisearch-types/src/lib.rs       |   1 +
 meilisearch-types/src/task_view.rs | 139 ++++++++++++++++++++++++++++
 meilisearch/src/routes/tasks.rs    | 140 +----------------------------
 meilisearch/tests/tasks/webhook.rs |  10 ++-
 5 files changed, 153 insertions(+), 141 deletions(-)
 create mode 100644 meilisearch-types/src/task_view.rs

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 8502da242..d96b6c2af 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -54,6 +54,7 @@ use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
+use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
 use puffin::FrameView;
 use roaring::RoaringBitmap;
@@ -1283,7 +1284,8 @@ impl IndexScheduler {
 
             for id in updated {
                 let task = self.get_task(&rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
-                let _ = serde_json::to_writer(&mut buffer, &task);
+                let _ = serde_json::to_writer(&mut buffer, &TaskView::from_task(&task));
+                buffer.push(b'\n');
             }
 
             println!("Sending request to {url}");
diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs
index b0762563a..e4f5cbeb4 100644
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@@ -9,6 +9,7 @@ pub mod index_uid_pattern;
 pub mod keys;
 pub mod settings;
 pub mod star_or;
+pub mod task_view;
 pub mod tasks;
 pub mod versioning;
 pub use milli::{heed, Index};
diff --git a/meilisearch-types/src/task_view.rs b/meilisearch-types/src/task_view.rs
new file mode 100644
index 000000000..02be91a88
--- /dev/null
+++ b/meilisearch-types/src/task_view.rs
@@ -0,0 +1,139 @@
+use serde::Serialize;
+use time::{Duration, OffsetDateTime};
+
+use crate::error::ResponseError;
+use crate::settings::{Settings, Unchecked};
+use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TaskView {
+    pub uid: TaskId,
+    #[serde(default)]
+    pub index_uid: Option<String>,
+    pub status: Status,
+    #[serde(rename = "type")]
+    pub kind: Kind,
+    pub canceled_by: Option<TaskId>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub details: Option<DetailsView>,
+    pub error: Option<ResponseError>,
+    #[serde(serialize_with = "serialize_duration", default)]
+    pub duration: Option<Duration>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub enqueued_at: OffsetDateTime,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub started_at: Option<OffsetDateTime>,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub finished_at: Option<OffsetDateTime>,
+}
+
+impl TaskView {
+    pub fn from_task(task: &Task) -> TaskView {
+        TaskView {
+            uid: task.uid,
+            index_uid: task.index_uid().map(ToOwned::to_owned),
+            status: task.status,
+            kind: task.kind.as_kind(),
+            canceled_by: task.canceled_by,
+            details: task.details.clone().map(DetailsView::from),
+            error: task.error.clone(),
+            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
+            enqueued_at: task.enqueued_at,
+            started_at: task.started_at,
+            finished_at: task.finished_at,
+        }
+    }
+}
+
+#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct DetailsView {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub received_documents: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub indexed_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub primary_key: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub provided_ids: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub matched_tasks: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub canceled_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub original_filter: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub dump_uid: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub settings: Option<Box<Settings<Unchecked>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub swaps: Option<Vec<IndexSwap>>,
+}
+
+impl From<Details> for DetailsView {
+    fn from(details: Details) -> Self {
+        match details {
+            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
+                DetailsView {
+                    received_documents: Some(received_documents),
+                    indexed_documents: Some(indexed_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::SettingsUpdate { settings } => {
+                DetailsView { settings: Some(settings), ..DetailsView::default() }
+            }
+            Details::IndexInfo { primary_key } => {
+                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
+            }
+            Details::DocumentDeletion {
+                provided_ids: received_document_ids,
+                deleted_documents,
+            } => DetailsView {
+                provided_ids: Some(received_document_ids),
+                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
+                ..DetailsView::default()
+            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::ClearAll { deleted_documents } => {
+                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
+            }
+            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    canceled_tasks: Some(canceled_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    deleted_tasks: Some(deleted_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::Dump { dump_uid } => {
+                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
+            }
+            Details::IndexSwap { swaps } => {
+                DetailsView { swaps: Some(swaps), ..Default::default() }
+            }
+        }
+    }
+}
diff --git a/meilisearch/src/routes/tasks.rs b/meilisearch/src/routes/tasks.rs
index f7d4c44d7..03b63001d 100644
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
-use meilisearch_types::tasks::{
-    serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
-};
+use meilisearch_types::task_view::TaskView;
+use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
 use serde_json::json;
 use time::format_description::well_known::Rfc3339;
@@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
     .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
     .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
 }
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskView {
-    pub uid: TaskId,
-    #[serde(default)]
-    pub index_uid: Option<String>,
-    pub status: Status,
-    #[serde(rename = "type")]
-    pub kind: Kind,
-    pub canceled_by: Option<TaskId>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub details: Option<DetailsView>,
-    pub error: Option<ResponseError>,
-    #[serde(serialize_with = "serialize_duration", default)]
-    pub duration: Option<Duration>,
-    #[serde(with = "time::serde::rfc3339")]
-    pub enqueued_at: OffsetDateTime,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub started_at: Option<OffsetDateTime>,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub finished_at: Option<OffsetDateTime>,
-}
-
-impl TaskView {
-    pub fn from_task(task: &Task) -> TaskView {
-        TaskView {
-            uid: task.uid,
-            index_uid: task.index_uid().map(ToOwned::to_owned),
-            status: task.status,
-            kind: task.kind.as_kind(),
-            canceled_by: task.canceled_by,
-            details: task.details.clone().map(DetailsView::from),
-            error: task.error.clone(),
-            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
-            enqueued_at: task.enqueued_at,
-            started_at: task.started_at,
-            finished_at: task.finished_at,
-        }
-    }
-}
-
-#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct DetailsView {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub received_documents: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub indexed_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub primary_key: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provided_ids: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub matched_tasks: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub canceled_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dump_uid: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(flatten)]
-    pub settings: Option<Box<Settings<Unchecked>>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub swaps: Option<Vec<IndexSwap>>,
-}
-
-impl From<Details> for DetailsView {
-    fn from(details: Details) -> Self {
-        match details {
-            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
-                DetailsView {
-                    received_documents: Some(received_documents),
-                    indexed_documents: Some(indexed_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::SettingsUpdate { settings } => {
-                DetailsView { settings: Some(settings), ..DetailsView::default() }
-            }
-            Details::IndexInfo { primary_key } => {
-                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
-            }
-            Details::DocumentDeletion {
-                provided_ids: received_document_ids,
-                deleted_documents,
-            } => DetailsView {
-                provided_ids: Some(received_document_ids),
-                deleted_documents: Some(deleted_documents),
-                original_filter: Some(None),
-                ..DetailsView::default()
-            },
-            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
-                DetailsView {
-                    provided_ids: Some(0),
-                    original_filter: Some(Some(original_filter)),
-                    deleted_documents: Some(deleted_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::ClearAll { deleted_documents } => {
-                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
-            }
-            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::Dump { dump_uid } => {
-                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
-            }
-            Details::IndexSwap { swaps } => {
-                DetailsView { swaps: Some(swaps), ..Default::default() }
-            }
-        }
-    }
-}
-
 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TasksFilterQuery {
diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs
index c95d3fc5b..613b4ff3f 100644
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -73,6 +73,7 @@ async fn test_basic_webhook() {
     .unwrap();
 
     let index = server.index("tamo");
+    // TODO: may be flaky, we're relying on the fact that during the time the first document addition succeed, the two other operations will be received.
     for i in 0..3 {
         let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
     }
@@ -81,11 +82,16 @@ async fn test_basic_webhook() {
     let jsonl = String::from_utf8(payload).unwrap();
 
     snapshot!(jsonl,
-        @r###"{"uid":0,"enqueuedAt":"2023-11-28T13:43:24.754587Z","startedAt":"2023-11-28T13:43:24.756445Z","finishedAt":"2023-11-28T13:43:24.791527Z","error":null,"canceledBy":null,"details":{"DocumentAdditionOrUpdate":{"received_documents":1,"indexed_documents":1}},"status":"succeeded","kind":{"documentAdditionOrUpdate":{"index_uid":"tamo","primary_key":null,"method":"ReplaceDocuments","content_file":"ca77ac82-4504-4c85-81a5-1a8d68f1a386","documents_count":1,"allow_index_creation":true}}}"###);
+        @r###"
+    {"uid":0,"indexUid":"tamo","status":"succeeded","type":"documentAdditionOrUpdate","canceledBy":null,"details":{"receivedDocuments":1,"indexedDocuments":1},"error":null,"duration":"PT0.027444S","enqueuedAt":"2023-11-28T14:05:37.767678Z","startedAt":"2023-11-28T14:05:37.769519Z","finishedAt":"2023-11-28T14:05:37.796963Z"}
+    "###);
 
     let payload = handle.receiver.recv().await.unwrap();
     let jsonl = String::from_utf8(payload).unwrap();
 
     snapshot!(jsonl,
-        @r###"{"uid":1,"enqueuedAt":"2023-11-28T13:43:24.761498Z","startedAt":"2023-11-28T13:43:24.793989Z","finishedAt":"2023-11-28T13:43:24.814623Z","error":null,"canceledBy":null,"details":{"DocumentAdditionOrUpdate":{"received_documents":1,"indexed_documents":1}},"status":"succeeded","kind":{"documentAdditionOrUpdate":{"index_uid":"tamo","primary_key":null,"method":"ReplaceDocuments","content_file":"c947aefa-7f98-433d-8ce4-5926d8d2ce10","documents_count":1,"allow_index_creation":true}}}{"uid":2,"enqueuedAt":"2023-11-28T13:43:24.76776Z","startedAt":"2023-11-28T13:43:24.793989Z","finishedAt":"2023-11-28T13:43:24.814623Z","error":null,"canceledBy":null,"details":{"DocumentAdditionOrUpdate":{"received_documents":1,"indexed_documents":1}},"status":"succeeded","kind":{"documentAdditionOrUpdate":{"index_uid":"tamo","primary_key":null,"method":"ReplaceDocuments","content_file":"a21d6da6-9322-4827-8c08-f33d2e1b6cae","documents_count":1,"allow_index_creation":true}}}"###);
+        @r###"
+    {"uid":1,"indexUid":"tamo","status":"succeeded","type":"documentAdditionOrUpdate","canceledBy":null,"details":{"receivedDocuments":1,"indexedDocuments":1},"error":null,"duration":"PT0.020221S","enqueuedAt":"2023-11-28T14:05:37.773731Z","startedAt":"2023-11-28T14:05:37.799448Z","finishedAt":"2023-11-28T14:05:37.819669Z"}
+    {"uid":2,"indexUid":"tamo","status":"succeeded","type":"documentAdditionOrUpdate","canceledBy":null,"details":{"receivedDocuments":1,"indexedDocuments":1},"error":null,"duration":"PT0.020221S","enqueuedAt":"2023-11-28T14:05:37.780466Z","startedAt":"2023-11-28T14:05:37.799448Z","finishedAt":"2023-11-28T14:05:37.819669Z"}
+    "###);
 }

From 0b2fff27f231c3eb37c9340a6879135c70a227d2 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 28 Nov 2023 15:55:48 +0100
Subject: [PATCH 09/41] update and fix the test

---
 index-scheduler/src/lib.rs         |  1 -
 meilisearch/tests/tasks/webhook.rs | 73 ++++++++++++++++++++----------
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index d96b6c2af..5d44ed104 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1288,7 +1288,6 @@ impl IndexScheduler {
                 buffer.push(b'\n');
             }
 
-            println!("Sending request to {url}");
             let _ = ureq::post(url).send_bytes(&buffer).unwrap();
         }
 
diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs
index 613b4ff3f..e852839ec 100644
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -1,10 +1,14 @@
+//! To test the webhook, we need to spawn a new server with a URL listening for
+//! post requests. The webhook handle starts a server and forwards all the
+//! received requests into a channel for you to handle.
+
 use std::sync::Arc;
 
 use actix_http::body::MessageBody;
 use actix_web::dev::{ServiceFactory, ServiceResponse};
 use actix_web::web::{Bytes, Data};
 use actix_web::{post, App, HttpResponse, HttpServer};
-use meili_snap::snapshot;
+use meili_snap::{json_string, snapshot};
 use meilisearch::Opt;
 use tokio::sync::mpsc;
 
@@ -13,7 +17,6 @@ use crate::json;
 
 #[post("/")]
 async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
-    println!("Received something");
     let body = body.to_vec();
     sender.send(body).unwrap();
     HttpResponse::Ok().into()
@@ -47,51 +50,73 @@ async fn create_webhook_server() -> WebhookHandle {
     let (sender, receiver) = mpsc::unbounded_channel();
     let sender = Arc::new(sender);
 
+    // By listening on the port 0, the system will give us any available port.
     let server =
         HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
     let (ip, scheme) = server.addrs_with_scheme()[0];
     let url = format!("{scheme}://{ip}/");
-    println!("url is {url}");
 
-    // TODO: Is it cleaned once the test is over
     let server_handle = tokio::spawn(server.run());
-
     WebhookHandle { server_handle, url, receiver }
 }
 
 #[actix_web::test]
 async fn test_basic_webhook() {
-    // Request a new server from the pool
-    let mut handle = create_webhook_server().await;
+    let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
 
     let db_path = tempfile::tempdir().unwrap();
     let server = Server::new_with_options(Opt {
-        task_webhook_url: Some(handle.url.clone()),
+        task_webhook_url: Some(url),
         ..default_settings(db_path.path())
     })
     .await
     .unwrap();
 
     let index = server.index("tamo");
-    // TODO: may be flaky, we're relying on the fact that during the time the first document addition succeed, the two other operations will be received.
-    for i in 0..3 {
+    // May be flaky: we're relying on the fact that while the first document addition is processed, the other
+    // operations will be received and will be batched together. If it doesn't happen it's not a problem
+    // the rest of the test won't assume anything about the number of tasks per batch.
+    for i in 0..5 {
         let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
     }
 
-    let payload = handle.receiver.recv().await.unwrap();
-    let jsonl = String::from_utf8(payload).unwrap();
+    let mut nb_tasks = 0;
+    while let Some(payload) = receiver.recv().await {
+        let payload = String::from_utf8(payload).unwrap();
+        let jsonl = payload.split('\n');
+        for json in jsonl {
+            if json.is_empty() {
+                break; // we reached EOF
+            }
+            nb_tasks += 1;
+            let json: serde_json::Value = serde_json::from_str(json).unwrap();
+            snapshot!(
+                json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+            @r###"
+            {
+              "uid": "[uid]",
+              "indexUid": "tamo",
+              "status": "succeeded",
+              "type": "documentAdditionOrUpdate",
+              "canceledBy": null,
+              "details": {
+                "receivedDocuments": 1,
+                "indexedDocuments": 1
+              },
+              "error": null,
+              "duration": "[duration]",
+              "enqueuedAt": "[date]",
+              "startedAt": "[date]",
+              "finishedAt": "[date]"
+            }
+            "###);
+        }
+        if nb_tasks == 5 {
+            break;
+        }
+    }
 
-    snapshot!(jsonl,
-        @r###"
-    {"uid":0,"indexUid":"tamo","status":"succeeded","type":"documentAdditionOrUpdate","canceledBy":null,"details":{"receivedDocuments":1,"indexedDocuments":1},"error":null,"duration":"PT0.027444S","enqueuedAt":"2023-11-28T14:05:37.767678Z","startedAt":"2023-11-28T14:05:37.769519Z","finishedAt":"2023-11-28T14:05:37.796963Z"}
-    "###);
+    assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
 
-    let payload = handle.receiver.recv().await.unwrap();
-    let jsonl = String::from_utf8(payload).unwrap();
-
-    snapshot!(jsonl,
-        @r###"
-    {"uid":1,"indexUid":"tamo","status":"succeeded","type":"documentAdditionOrUpdate","canceledBy":null,"details":{"receivedDocuments":1,"indexedDocuments":1},"error":null,"duration":"PT0.020221S","enqueuedAt":"2023-11-28T14:05:37.773731Z","startedAt":"2023-11-28T14:05:37.799448Z","finishedAt":"2023-11-28T14:05:37.819669Z"}
-    {"uid":2,"indexUid":"tamo","status":"succeeded","type":"documentAdditionOrUpdate","canceledBy":null,"details":{"receivedDocuments":1,"indexedDocuments":1},"error":null,"duration":"PT0.020221S","enqueuedAt":"2023-11-28T14:05:37.780466Z","startedAt":"2023-11-28T14:05:37.799448Z","finishedAt":"2023-11-28T14:05:37.819669Z"}
-    "###);
+    server_handle.abort();
 }

From 547379abb018af63e21a550384d19cf2c1fbb033 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 28 Nov 2023 16:28:11 +0100
Subject: [PATCH 10/41] parse the url correctly

---
 Cargo.lock                | 18 ++++++++++--------
 meilisearch/Cargo.toml    |  1 +
 meilisearch/src/lib.rs    |  2 +-
 meilisearch/src/option.rs |  9 +++++++--
 4 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ca7eb715f..ab566e836 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1704,9 +1704,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
 name = "form_urlencoded"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
+checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456"
 dependencies = [
  "percent-encoding",
 ]
@@ -2756,9 +2756,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
 
 [[package]]
 name = "idna"
-version = "0.4.0"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
+checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6"
 dependencies = [
  "unicode-bidi",
  "unicode-normalization",
@@ -3563,6 +3563,7 @@ dependencies = [
  "tokio",
  "tokio-stream",
  "toml",
+ "url",
  "urlencoding",
  "uuid 1.5.0",
  "vergen",
@@ -4071,9 +4072,9 @@ dependencies = [
 
 [[package]]
 name = "percent-encoding"
-version = "2.3.0"
+version = "2.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
+checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
 
 [[package]]
 name = "permissive-json-pointer"
@@ -5603,13 +5604,14 @@ dependencies = [
 
 [[package]]
 name = "url"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50bff7831e19200a85b17131d085c25d7811bc4e186efdaf54bbd132994a88cb"
+checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633"
 dependencies = [
  "form_urlencoded",
  "idna",
  "percent-encoding",
+ "serde",
 ]
 
 [[package]]
diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index c59b38fa6..434a488f5 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -104,6 +104,7 @@ walkdir = "2.3.3"
 yaup = "0.2.1"
 serde_urlencoded = "0.7.1"
 termcolor = "1.2.0"
+url = { version = "2.5.0", features = ["serde"] }
 
 [dev-dependencies]
 actix-rt = "2.8.0"
diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs
index 14a1c5b45..3698e5da4 100644
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -228,7 +228,7 @@ fn open_or_create_database_unchecked(
             indexes_path: opt.db_path.join("indexes"),
             snapshots_path: opt.snapshot_dir.clone(),
             dumps_path: opt.dump_dir.clone(),
-            webhook_url: opt.task_webhook_url.clone(),
+            webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
             task_db_size: opt.max_task_db_size.get_bytes() as usize,
             index_base_map_size: opt.max_index_size.get_bytes() as usize,
             enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index 37472e174..e1f16d888 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -21,6 +21,7 @@ use rustls::RootCertStore;
 use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
 use serde::{Deserialize, Serialize};
 use sysinfo::{RefreshKind, System, SystemExt};
+use url::Url;
 
 const POSSIBLE_ENV: [&str; 2] = ["development", "production"];
 
@@ -69,6 +70,10 @@ const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
 const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
 const DEFAULT_LOG_EVERY_N: usize = 100_000;
 
+fn parse_url(s: &str) -> Result<Url, url::ParseError> {
+    Url::parse(s)
+}
+
 // Each environment (index and task-db) is taking space in the virtual address space.
 // Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
 // The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
@@ -159,7 +164,7 @@ pub struct Opt {
 
     /// Called whenever a task finishes so a third party can be notified.
     #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
-    pub task_webhook_url: Option<String>,
+    pub task_webhook_url: Option<Url>,
 
     /// Deactivates Meilisearch's built-in telemetry when provided.
     ///
@@ -416,7 +421,7 @@ impl Opt {
         }
         export_to_env_if_not_present(MEILI_ENV, env);
         if let Some(task_webhook_url) = task_webhook_url {
-            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url);
+            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
         }
 
         #[cfg(feature = "analytics")]

From be72326c0a656841e3e5d051284f8997abe2b065 Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 29 Nov 2023 13:09:04 +0100
Subject: [PATCH 11/41] gzip the tasks

---
 Cargo.lock                         | 5 +++--
 index-scheduler/Cargo.toml         | 1 +
 index-scheduler/src/lib.rs         | 7 ++++++-
 meilisearch/tests/tasks/webhook.rs | 3 ++-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index ab566e836..e632d8621 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1680,9 +1680,9 @@ dependencies = [
 
 [[package]]
 name = "flate2"
-version = "1.0.26"
+version = "1.0.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
+checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -2777,6 +2777,7 @@ dependencies = [
  "dump",
  "enum-iterator",
  "file-store",
+ "flate2",
  "insta",
  "log",
  "meili-snap",
diff --git a/index-scheduler/Cargo.toml b/index-scheduler/Cargo.toml
index 4d6e4ffd0..a8c19f435 100644
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -18,6 +18,7 @@ derive_builder = "0.12.0"
 dump = { path = "../dump" }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
+flate2 = "1.0.28"
 log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 5d44ed104..bfaca3126 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -45,6 +45,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
 pub use error::Error;
 pub use features::RoFeatures;
 use file_store::FileStore;
+use flate2::bufread::GzEncoder;
+use flate2::Compression;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::byteorder::BE;
@@ -1288,7 +1290,10 @@ impl IndexScheduler {
                 buffer.push(b'\n');
             }
 
-            let _ = ureq::post(url).send_bytes(&buffer).unwrap();
+            let reader = GzEncoder::new(&buffer[..], Compression::default());
+            if let Err(e) = ureq::post(url).set("Content-Encoding", "gzip").send(reader) {
+                log::error!("While sending data to the webhook: {e}");
+            }
         }
 
         Ok(())
diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs
index e852839ec..688d35e8b 100644
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -11,6 +11,7 @@ use actix_web::{post, App, HttpResponse, HttpServer};
 use meili_snap::{json_string, snapshot};
 use meilisearch::Opt;
 use tokio::sync::mpsc;
+use url::Url;
 
 use crate::common::{default_settings, Server};
 use crate::json;
@@ -66,7 +67,7 @@ async fn test_basic_webhook() {
 
     let db_path = tempfile::tempdir().unwrap();
     let server = Server::new_with_options(Opt {
-        task_webhook_url: Some(url),
+        task_webhook_url: Some(Url::parse(&url).unwrap()),
         ..default_settings(db_path.path())
     })
     .await

From c83a33017e5224b4f7dfa713201b80b1070b69bc Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 29 Nov 2023 14:27:50 +0100
Subject: [PATCH 12/41] stream and chunk the data

---
 index-scheduler/src/lib.rs         | 63 +++++++++++++++++++++++++-----
 meilisearch/tests/tasks/webhook.rs |  2 +-
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index bfaca3126..b5b061a50 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -34,6 +34,7 @@ pub type TaskId = u32;
 
 use std::collections::{BTreeMap, HashMap};
 use std::fs::File;
+use std::io::{self, BufReader, Read};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@@ -1279,18 +1280,60 @@ impl IndexScheduler {
     /// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
     fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
         if let Some(ref url) = self.webhook_url {
-            let rtxn = self.env.read_txn()?;
-
-            // on average a task takes ~50 bytes
-            let mut buffer = Vec::with_capacity(updated.len() as usize * 50);
-
-            for id in updated {
-                let task = self.get_task(&rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
-                let _ = serde_json::to_writer(&mut buffer, &TaskView::from_task(&task));
-                buffer.push(b'\n');
+            struct TaskReader<'a, 'b> {
+                rtxn: &'a RoTxn<'a>,
+                index_scheduler: &'a IndexScheduler,
+                tasks: &'b mut roaring::bitmap::Iter<'b>,
+                buffer: Vec<u8>,
+                written: usize,
             }
 
-            let reader = GzEncoder::new(&buffer[..], Compression::default());
+            impl<'a, 'b> Read for TaskReader<'a, 'b> {
+                fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
+                    if self.buffer.is_empty() {
+                        match self.tasks.next() {
+                            None => return Ok(0),
+                            Some(task_id) => {
+                                let task = self
+                                    .index_scheduler
+                                    .get_task(self.rtxn, task_id)
+                                    .map_err(io::Error::other)?
+                                    .ok_or_else(|| io::Error::other(Error::CorruptedTaskQueue))?;
+
+                                serde_json::to_writer(
+                                    &mut self.buffer,
+                                    &TaskView::from_task(&task),
+                                )?;
+                                self.buffer.push(b'\n');
+                            }
+                        }
+                    }
+
+                    let mut to_write = &self.buffer[self.written..];
+                    let wrote = io::copy(&mut to_write, &mut buf)?;
+                    self.written += wrote as usize;
+
+                    // we wrote everything and must refresh our buffer on the next call
+                    if self.written == self.buffer.len() {
+                        self.written = 0;
+                        self.buffer.clear();
+                    }
+
+                    Ok(wrote as usize)
+                }
+            }
+
+            let rtxn = self.env.read_txn()?;
+
+            let task_reader = TaskReader {
+                rtxn: &rtxn,
+                index_scheduler: self,
+                tasks: &mut updated.into_iter(),
+                buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
+                written: 0,
+            };
+
+            let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
             if let Err(e) = ureq::post(url).set("Content-Encoding", "gzip").send(reader) {
                 log::error!("While sending data to the webhook: {e}");
             }
diff --git a/meilisearch/tests/tasks/webhook.rs b/meilisearch/tests/tasks/webhook.rs
index 688d35e8b..6979ff294 100644
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -45,7 +45,7 @@ struct WebhookHandle {
 
 async fn create_webhook_server() -> WebhookHandle {
     let mut log_builder = env_logger::Builder::new();
-    log_builder.parse_filters("debug");
+    log_builder.parse_filters("info");
     log_builder.init();
 
     let (sender, receiver) = mpsc::unbounded_channel();

From 4fb25b878240ab0bc5034c6b25a6cc282884652a Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Wed, 29 Nov 2023 14:51:47 +0100
Subject: [PATCH 13/41] fix clippy

---
 index-scheduler/src/lib.rs | 9 +++++++--
 meilisearch/src/option.rs  | 4 ----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index b5b061a50..2ad263ca4 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1297,8 +1297,13 @@ impl IndexScheduler {
                                 let task = self
                                     .index_scheduler
                                     .get_task(self.rtxn, task_id)
-                                    .map_err(io::Error::other)?
-                                    .ok_or_else(|| io::Error::other(Error::CorruptedTaskQueue))?;
+                                    .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
+                                    .ok_or_else(|| {
+                                        io::Error::new(
+                                            io::ErrorKind::Other,
+                                            Error::CorruptedTaskQueue,
+                                        )
+                                    })?;
 
                                 serde_json::to_writer(
                                     &mut self.buffer,
diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index e1f16d888..abb2bab6c 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -70,10 +70,6 @@ const MEILI_MAX_INDEXING_MEMORY: &str = "MEILI_MAX_INDEXING_MEMORY";
 const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
 const DEFAULT_LOG_EVERY_N: usize = 100_000;
 
-fn parse_url(s: &str) -> Result<Url, url::ParseError> {
-    Url::parse(s)
-}
-
 // Each environment (index and task-db) is taking space in the virtual address space.
 // Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
 // The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be

From 19736cefe858377c4c6143daa05479361b87ad8e Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Mon, 4 Dec 2023 10:38:01 +0100
Subject: [PATCH 14/41] add the analytics

---
 meilisearch/src/analytics/segment_analytics.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 6a617577b..0cdb18540 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -264,6 +264,7 @@ struct Infos {
     ignore_snapshot_if_db_exists: bool,
     http_addr: bool,
     http_payload_size_limit: Byte,
+    task_queue_webhook: bool,
     log_level: String,
     max_indexing_memory: MaxMemory,
     max_indexing_threads: MaxThreads,
@@ -290,7 +291,7 @@ impl From<Opt> for Infos {
             http_addr,
             master_key: _,
             env,
-            task_webhook_url: _,
+            task_webhook_url,
             max_index_size: _,
             max_task_db_size: _,
             http_payload_size_limit,
@@ -344,6 +345,7 @@ impl From<Opt> for Infos {
             http_addr: http_addr != default_http_addr(),
             http_payload_size_limit,
             experimental_max_number_of_batched_tasks,
+            task_queue_webhook: task_webhook_url.is_some(),
             log_level: log_level.to_string(),
             max_indexing_memory,
             max_indexing_threads,

From fa2b96b9a5e80fc5ba18a5d78d30113ca0ee6d37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 19 Dec 2023 12:18:45 +0100
Subject: [PATCH 15/41] Add an Authorization Header along with the webhook
 calls

---
 index-scheduler/src/insta_snapshot.rs          |  1 +
 index-scheduler/src/lib.rs                     | 17 ++++++++++++++++-
 meilisearch/src/analytics/segment_analytics.rs |  3 +++
 meilisearch/src/lib.rs                         |  1 +
 meilisearch/src/option.rs                      | 12 ++++++++++++
 5 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs
index 9261bf66d..0adda43ff 100644
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -38,6 +38,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
         auth_path: _,
         version_file_path: _,
         webhook_url: _,
+        webhook_authorization_header: _,
         test_breakpoint_sdr: _,
         planned_failures: _,
         run_loop_iteration: _,
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 2ad263ca4..296f8add1 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -245,7 +245,10 @@ pub struct IndexSchedulerOptions {
     pub snapshots_path: PathBuf,
     /// The path to the folder containing the dumps.
     pub dumps_path: PathBuf,
+    /// The URL on which we must send the tasks statuses
     pub webhook_url: Option<String>,
+    /// The value we will send into the Authorization HTTP header on the webhook URL
+    pub webhook_authorization_header: Option<String>,
     /// The maximum size, in bytes, of the task index.
     pub task_db_size: usize,
     /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@@ -330,6 +333,8 @@ pub struct IndexScheduler {
 
     /// The webhook url we should send tasks to after processing every batches.
     pub(crate) webhook_url: Option<String>,
+    /// The Authorization header to send to the webhook URL.
+    pub(crate) webhook_authorization_header: Option<String>,
 
     /// A frame to output the indexation profiling files to disk.
     pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
@@ -397,6 +402,7 @@ impl IndexScheduler {
             auth_path: self.auth_path.clone(),
             version_file_path: self.version_file_path.clone(),
             webhook_url: self.webhook_url.clone(),
+            webhook_authorization_header: self.webhook_authorization_header.clone(),
             currently_updating_index: self.currently_updating_index.clone(),
             embedders: self.embedders.clone(),
             #[cfg(test)]
@@ -497,6 +503,7 @@ impl IndexScheduler {
             auth_path: options.auth_path,
             version_file_path: options.version_file_path,
             webhook_url: options.webhook_url,
+            webhook_authorization_header: options.webhook_authorization_header,
             currently_updating_index: Arc::new(RwLock::new(None)),
             embedders: Default::default(),
 
@@ -1338,8 +1345,15 @@ impl IndexScheduler {
                 written: 0,
             };
 
+            // let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
             let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
-            if let Err(e) = ureq::post(url).set("Content-Encoding", "gzip").send(reader) {
+            let request = ureq::post(url).set("Content-Encoding", "gzip");
+            let request = match &self.webhook_authorization_header {
+                Some(header) => request.set("Authorization", header),
+                None => request,
+            };
+
+            if let Err(e) = request.send(reader) {
                 log::error!("While sending data to the webhook: {e}");
             }
         }
@@ -1761,6 +1775,7 @@ mod tests {
                 snapshots_path: tempdir.path().join("snapshots"),
                 dumps_path: tempdir.path().join("dumps"),
                 webhook_url: None,
+                webhook_authorization_header: None,
                 task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                 index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                 enable_mdb_writemap: false,
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 0cdb18540..86a5eddb9 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -265,6 +265,7 @@ struct Infos {
     http_addr: bool,
     http_payload_size_limit: Byte,
     task_queue_webhook: bool,
+    task_webhook_authorization_header: bool,
     log_level: String,
     max_indexing_memory: MaxMemory,
     max_indexing_threads: MaxThreads,
@@ -292,6 +293,7 @@ impl From<Opt> for Infos {
             master_key: _,
             env,
             task_webhook_url,
+            task_webhook_authorization_header,
             max_index_size: _,
             max_task_db_size: _,
             http_payload_size_limit,
@@ -346,6 +348,7 @@ impl From<Opt> for Infos {
             http_payload_size_limit,
             experimental_max_number_of_batched_tasks,
             task_queue_webhook: task_webhook_url.is_some(),
+            task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
             log_level: log_level.to_string(),
             max_indexing_memory,
             max_indexing_threads,
diff --git a/meilisearch/src/lib.rs b/meilisearch/src/lib.rs
index 3698e5da4..f1111962c 100644
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -229,6 +229,7 @@ fn open_or_create_database_unchecked(
             snapshots_path: opt.snapshot_dir.clone(),
             dumps_path: opt.dump_dir.clone(),
             webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
+            webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
             task_db_size: opt.max_task_db_size.get_bytes() as usize,
             index_base_map_size: opt.max_index_size.get_bytes() as usize,
             enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs
index abb2bab6c..a0672c9cf 100644
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -30,6 +30,7 @@ const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
 const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
 const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
+const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
 #[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@@ -162,6 +163,10 @@ pub struct Opt {
     #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
     pub task_webhook_url: Option<Url>,
 
+    /// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
+    pub task_webhook_authorization_header: Option<String>,
+
     /// Deactivates Meilisearch's built-in telemetry when provided.
     ///
     /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@@ -382,6 +387,7 @@ impl Opt {
             master_key,
             env,
             task_webhook_url,
+            task_webhook_authorization_header,
             max_index_size: _,
             max_task_db_size: _,
             http_payload_size_limit,
@@ -419,6 +425,12 @@ impl Opt {
         if let Some(task_webhook_url) = task_webhook_url {
             export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
         }
+        if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
+            export_to_env_if_not_present(
+                MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
+                task_webhook_authorization_header,
+            );
+        }
 
         #[cfg(feature = "analytics")]
         {

From 333ce12eb2dcfa947c464934c3dfda59c38f2d6e Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 10:17:49 +0100
Subject: [PATCH 16/41] Fixed issue where the default revision is always the
 one we picked for the default model

---
 milli/src/vector/settings.rs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs
index 912cdf953..1826c040d 100644
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -228,6 +228,12 @@ impl From<HfEmbedderSettings> for crate::vector::hf::EmbedderOptions {
         let mut this = Self::default();
         if let Some(model) = model.set() {
             this.model = model;
+            // Reset the revision if we are setting the model.
+            // This allows the following:
+            // "huggingFace": {} -> default model with default revision
+            // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision
+            // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision
+            this.revision = None;
         }
         if let Some(revision) = revision.set() {
             this.revision = Some(revision);

From e249e4db7bb7bb874dffb79ca1161f8448065c2d Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:07:18 +0100
Subject: [PATCH 17/41] Change Setting::apply function signature

---
 milli/src/update/settings.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index d406c121c..e47a5ad52 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -78,11 +78,19 @@ impl<T> Setting<T> {
         }
     }
 
-    pub fn apply(&mut self, new: Self) {
+    /// Returns `true` if applying the new setting changed this setting
+    pub fn apply(&mut self, new: Self) -> bool
+    where
+        T: PartialEq + Eq,
+    {
         if let Setting::NotSet = new {
-            return;
+            return false;
+        }
+        if self == &new {
+            return false;
         }
         *self = new;
+        true
     }
 }
 

From 393216bf30b2942122b9228abd392a27b55a880e Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:08:32 +0100
Subject: [PATCH 18/41] Flatten embedders settings

---
 milli/src/vector/openai.rs   |   9 +-
 milli/src/vector/settings.rs | 464 ++++++++++++++++-------------------
 2 files changed, 213 insertions(+), 260 deletions(-)

diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs
index c11e6ddc6..53e8a041b 100644
--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@@ -34,6 +34,9 @@ pub struct EmbedderOptions {
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(rename_all = camelCase, deny_unknown_fields)]
 pub enum EmbeddingModel {
+    // # WARNING
+    //
+    // If ever adding a model, make sure to add it to the list of supported models below.
     #[default]
     #[serde(rename = "text-embedding-ada-002")]
     #[deserr(rename = "text-embedding-ada-002")]
@@ -41,6 +44,10 @@ pub enum EmbeddingModel {
 }
 
 impl EmbeddingModel {
+    pub fn supported_models() -> &'static [&'static str] {
+        &["text-embedding-ada-002"]
+    }
+
     pub fn max_token(&self) -> usize {
         match self {
             EmbeddingModel::TextEmbeddingAda002 => 8191,
@@ -59,7 +66,7 @@ impl EmbeddingModel {
         }
     }
 
-    pub fn from_name(name: &'static str) -> Option<Self> {
+    pub fn from_name(name: &str) -> Option<Self> {
         match name {
             "text-embedding-ada-002" => Some(EmbeddingModel::TextEmbeddingAda002),
             _ => None,
diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs
index 1826c040d..945fc62c0 100644
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -4,32 +4,189 @@ use serde::{Deserialize, Serialize};
 use crate::prompt::PromptData;
 use crate::update::Setting;
 use crate::vector::EmbeddingConfig;
+use crate::UserError;
 
 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(rename_all = camelCase, deny_unknown_fields)]
 pub struct EmbeddingSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "source")]
-    #[deserr(default, rename = "source")]
-    pub embedder_options: Setting<EmbedderSettings>,
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
     #[deserr(default)]
-    pub document_template: Setting<PromptSettings>,
+    pub source: Setting<EmbedderSource>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub model: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub revision: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub api_key: Setting<String>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub dimensions: Setting<usize>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default)]
+    pub document_template: Setting<String>,
+}
+
+pub fn check_unset<T>(
+    key: &Setting<T>,
+    field: &'static str,
+    source: EmbedderSource,
+    embedder_name: &str,
+) -> Result<(), UserError> {
+    if matches!(key, Setting::NotSet) {
+        Ok(())
+    } else {
+        Err(UserError::InvalidFieldForSource {
+            embedder_name: embedder_name.to_owned(),
+            source_: source,
+            field,
+            allowed_fields_for_source: EmbeddingSettings::allowed_fields_for_source(source),
+            allowed_sources_for_field: EmbeddingSettings::allowed_sources_for_field(field),
+        })
+    }
+}
+
+pub fn check_set<T>(
+    key: &Setting<T>,
+    field: &'static str,
+    source: EmbedderSource,
+    embedder_name: &str,
+) -> Result<(), UserError> {
+    if matches!(key, Setting::Set(_)) {
+        Ok(())
+    } else {
+        Err(UserError::MissingFieldForSource {
+            field,
+            source_: source,
+            embedder_name: embedder_name.to_owned(),
+        })
+    }
+}
+
+impl EmbeddingSettings {
+    pub const SOURCE: &str = "source";
+    pub const MODEL: &str = "model";
+    pub const REVISION: &str = "revision";
+    pub const API_KEY: &str = "apiKey";
+    pub const DIMENSIONS: &str = "dimensions";
+    pub const DOCUMENT_TEMPLATE: &str = "documentTemplate";
+
+    pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
+        match field {
+            Self::SOURCE => {
+                &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
+            }
+            Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
+            Self::REVISION => &[EmbedderSource::HuggingFace],
+            Self::API_KEY => &[EmbedderSource::OpenAi],
+            Self::DIMENSIONS => &[EmbedderSource::UserProvided],
+            Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
+            _other => unreachable!("unknown field"),
+        }
+    }
+
+    pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] {
+        match source {
+            EmbedderSource::OpenAi => {
+                &[Self::SOURCE, Self::MODEL, Self::API_KEY, Self::DOCUMENT_TEMPLATE]
+            }
+            EmbedderSource::HuggingFace => {
+                &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
+            }
+            EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
+        }
+    }
+
+    pub(crate) fn apply_default_source(setting: &mut Setting<EmbeddingSettings>) {
+        if let Setting::Set(EmbeddingSettings {
+            source: source @ (Setting::NotSet | Setting::Reset),
+            ..
+        }) = setting
+        {
+            *source = Setting::Set(EmbedderSource::default())
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
+#[serde(deny_unknown_fields, rename_all = "camelCase")]
+#[deserr(rename_all = camelCase, deny_unknown_fields)]
+pub enum EmbedderSource {
+    #[default]
+    OpenAi,
+    HuggingFace,
+    UserProvided,
+}
+
+impl std::fmt::Display for EmbedderSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            EmbedderSource::OpenAi => "openAi",
+            EmbedderSource::HuggingFace => "huggingFace",
+            EmbedderSource::UserProvided => "userProvided",
+        };
+        f.write_str(s)
+    }
 }
 
 impl EmbeddingSettings {
     pub fn apply(&mut self, new: Self) {
-        let EmbeddingSettings { embedder_options, document_template: prompt } = new;
-        self.embedder_options.apply(embedder_options);
-        self.document_template.apply(prompt);
+        let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+            new;
+        let old_source = self.source;
+        self.source.apply(source);
+        // Reinitialize the whole setting object on a source change
+        if old_source != self.source {
+            *self = EmbeddingSettings {
+                source,
+                model,
+                revision,
+                api_key,
+                dimensions,
+                document_template,
+            };
+            return;
+        }
+
+        self.model.apply(model);
+        self.revision.apply(revision);
+        self.api_key.apply(api_key);
+        self.dimensions.apply(dimensions);
+        self.document_template.apply(document_template);
     }
 }
 
 impl From<EmbeddingConfig> for EmbeddingSettings {
     fn from(value: EmbeddingConfig) -> Self {
-        Self {
-            embedder_options: Setting::Set(value.embedder_options.into()),
-            document_template: Setting::Set(value.prompt.into()),
+        let EmbeddingConfig { embedder_options, prompt } = value;
+        match embedder_options {
+            super::EmbedderOptions::HuggingFace(options) => Self {
+                source: Setting::Set(EmbedderSource::HuggingFace),
+                model: Setting::Set(options.model),
+                revision: options.revision.map(Setting::Set).unwrap_or_default(),
+                api_key: Setting::NotSet,
+                dimensions: Setting::NotSet,
+                document_template: Setting::Set(prompt.template),
+            },
+            super::EmbedderOptions::OpenAi(options) => Self {
+                source: Setting::Set(EmbedderSource::OpenAi),
+                model: Setting::Set(options.embedding_model.name().to_owned()),
+                revision: Setting::NotSet,
+                api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
+                dimensions: Setting::NotSet,
+                document_template: Setting::Set(prompt.template),
+            },
+            super::EmbedderOptions::UserProvided(options) => Self {
+                source: Setting::Set(EmbedderSource::UserProvided),
+                model: Setting::NotSet,
+                revision: Setting::NotSet,
+                api_key: Setting::NotSet,
+                dimensions: Setting::Set(options.dimensions),
+                document_template: Setting::NotSet,
+            },
         }
     }
 }
@@ -37,262 +194,51 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
 impl From<EmbeddingSettings> for EmbeddingConfig {
     fn from(value: EmbeddingSettings) -> Self {
         let mut this = Self::default();
-        let EmbeddingSettings { embedder_options, document_template: prompt } = value;
-        if let Some(embedder_options) = embedder_options.set() {
-            this.embedder_options = embedder_options.into();
-        }
-        if let Some(prompt) = prompt.set() {
-            this.prompt = prompt.into();
-        }
-        this
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct PromptSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub template: Setting<String>,
-}
-
-impl PromptSettings {
-    pub fn apply(&mut self, new: Self) {
-        let PromptSettings { template } = new;
-        self.template.apply(template);
-    }
-}
-
-impl From<PromptData> for PromptSettings {
-    fn from(value: PromptData) -> Self {
-        Self { template: Setting::Set(value.template) }
-    }
-}
-
-impl From<PromptSettings> for PromptData {
-    fn from(value: PromptSettings) -> Self {
-        let mut this = PromptData::default();
-        let PromptSettings { template } = value;
-        if let Some(template) = template.set() {
-            this.template = template;
-        }
-        this
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-pub enum EmbedderSettings {
-    HuggingFace(Setting<HfEmbedderSettings>),
-    OpenAi(Setting<OpenAiEmbedderSettings>),
-    UserProvided(UserProvidedSettings),
-}
-
-impl<E> Deserr<E> for EmbedderSettings
-where
-    E: deserr::DeserializeError,
-{
-    fn deserialize_from_value<V: deserr::IntoValue>(
-        value: deserr::Value<V>,
-        location: deserr::ValuePointerRef,
-    ) -> Result<Self, E> {
-        match value {
-            deserr::Value::Map(map) => {
-                if deserr::Map::len(&map) != 1 {
-                    return Err(deserr::take_cf_content(E::error::<V>(
-                        None,
-                        deserr::ErrorKind::Unexpected {
-                            msg: format!(
-                                "Expected a single field, got {} fields",
-                                deserr::Map::len(&map)
-                            ),
-                        },
-                        location,
-                    )));
+        let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+            value;
+        if let Some(source) = source.set() {
+            match source {
+                EmbedderSource::OpenAi => {
+                    let mut options = super::openai::EmbedderOptions::with_default_model(None);
+                    if let Some(model) = model.set() {
+                        if let Some(model) = super::openai::EmbeddingModel::from_name(&model) {
+                            options.embedding_model = model;
+                        }
+                    }
+                    if let Some(api_key) = api_key.set() {
+                        options.api_key = Some(api_key);
+                    }
+                    this.embedder_options = super::EmbedderOptions::OpenAi(options);
                 }
-                let mut it = deserr::Map::into_iter(map);
-                let (k, v) = it.next().unwrap();
-
-                match k.as_str() {
-                    "huggingFace" => Ok(EmbedderSettings::HuggingFace(Setting::Set(
-                        HfEmbedderSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    ))),
-                    "openAi" => Ok(EmbedderSettings::OpenAi(Setting::Set(
-                        OpenAiEmbedderSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    ))),
-                    "userProvided" => Ok(EmbedderSettings::UserProvided(
-                        UserProvidedSettings::deserialize_from_value(
-                            v.into_value(),
-                            location.push_key(&k),
-                        )?,
-                    )),
-                    other => Err(deserr::take_cf_content(E::error::<V>(
-                        None,
-                        deserr::ErrorKind::UnknownKey {
-                            key: other,
-                            accepted: &["huggingFace", "openAi", "userProvided"],
-                        },
-                        location,
-                    ))),
+                EmbedderSource::HuggingFace => {
+                    let mut options = super::hf::EmbedderOptions::default();
+                    if let Some(model) = model.set() {
+                        options.model = model;
+                        // Reset the revision if we are setting the model.
+                        // This allows the following:
+                        // "huggingFace": {} -> default model with default revision
+                        // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision
+                        // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision
+                        options.revision = None;
+                    }
+                    if let Some(revision) = revision.set() {
+                        options.revision = Some(revision);
+                    }
+                    this.embedder_options = super::EmbedderOptions::HuggingFace(options);
+                }
+                EmbedderSource::UserProvided => {
+                    this.embedder_options =
+                        super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions {
+                            dimensions: dimensions.set().unwrap(),
+                        });
                 }
             }
-            _ => Err(deserr::take_cf_content(E::error::<V>(
-                None,
-                deserr::ErrorKind::IncorrectValueKind {
-                    actual: value,
-                    accepted: &[deserr::ValueKind::Map],
-                },
-                location,
-            ))),
         }
-    }
-}
 
-impl Default for EmbedderSettings {
-    fn default() -> Self {
-        Self::OpenAi(Default::default())
-    }
-}
-
-impl From<crate::vector::EmbedderOptions> for EmbedderSettings {
-    fn from(value: crate::vector::EmbedderOptions) -> Self {
-        match value {
-            crate::vector::EmbedderOptions::HuggingFace(hf) => {
-                Self::HuggingFace(Setting::Set(hf.into()))
-            }
-            crate::vector::EmbedderOptions::OpenAi(openai) => {
-                Self::OpenAi(Setting::Set(openai.into()))
-            }
-            crate::vector::EmbedderOptions::UserProvided(user_provided) => {
-                Self::UserProvided(user_provided.into())
-            }
+        if let Setting::Set(template) = document_template {
+            this.prompt = PromptData { template }
         }
-    }
-}
 
-impl From<EmbedderSettings> for crate::vector::EmbedderOptions {
-    fn from(value: EmbedderSettings) -> Self {
-        match value {
-            EmbedderSettings::HuggingFace(Setting::Set(hf)) => Self::HuggingFace(hf.into()),
-            EmbedderSettings::HuggingFace(_setting) => Self::HuggingFace(Default::default()),
-            EmbedderSettings::OpenAi(Setting::Set(ai)) => Self::OpenAi(ai.into()),
-            EmbedderSettings::OpenAi(_setting) => {
-                Self::OpenAi(crate::vector::openai::EmbedderOptions::with_default_model(None))
-            }
-            EmbedderSettings::UserProvided(user_provided) => {
-                Self::UserProvided(user_provided.into())
-            }
-        }
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct HfEmbedderSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub model: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub revision: Setting<String>,
-}
-
-impl HfEmbedderSettings {
-    pub fn apply(&mut self, new: Self) {
-        let HfEmbedderSettings { model, revision } = new;
-        self.model.apply(model);
-        self.revision.apply(revision);
-    }
-}
-
-impl From<crate::vector::hf::EmbedderOptions> for HfEmbedderSettings {
-    fn from(value: crate::vector::hf::EmbedderOptions) -> Self {
-        Self {
-            model: Setting::Set(value.model),
-            revision: value.revision.map(Setting::Set).unwrap_or(Setting::NotSet),
-        }
-    }
-}
-
-impl From<HfEmbedderSettings> for crate::vector::hf::EmbedderOptions {
-    fn from(value: HfEmbedderSettings) -> Self {
-        let HfEmbedderSettings { model, revision } = value;
-        let mut this = Self::default();
-        if let Some(model) = model.set() {
-            this.model = model;
-            // Reset the revision if we are setting the model.
-            // This allows the following:
-            // "huggingFace": {} -> default model with default revision
-            // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision
-            // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision
-            this.revision = None;
-        }
-        if let Some(revision) = revision.set() {
-            this.revision = Some(revision);
-        }
         this
     }
 }
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct OpenAiEmbedderSettings {
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default)]
-    pub api_key: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set", rename = "model")]
-    #[deserr(default, rename = "model")]
-    pub embedding_model: Setting<crate::vector::openai::EmbeddingModel>,
-}
-
-impl OpenAiEmbedderSettings {
-    pub fn apply(&mut self, new: Self) {
-        let Self { api_key, embedding_model: embedding_mode } = new;
-        self.api_key.apply(api_key);
-        self.embedding_model.apply(embedding_mode);
-    }
-}
-
-impl From<crate::vector::openai::EmbedderOptions> for OpenAiEmbedderSettings {
-    fn from(value: crate::vector::openai::EmbedderOptions) -> Self {
-        Self {
-            api_key: value.api_key.map(Setting::Set).unwrap_or(Setting::Reset),
-            embedding_model: Setting::Set(value.embedding_model),
-        }
-    }
-}
-
-impl From<OpenAiEmbedderSettings> for crate::vector::openai::EmbedderOptions {
-    fn from(value: OpenAiEmbedderSettings) -> Self {
-        let OpenAiEmbedderSettings { api_key, embedding_model } = value;
-        Self { api_key: api_key.set(), embedding_model: embedding_model.set().unwrap_or_default() }
-    }
-}
-
-#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
-#[serde(deny_unknown_fields, rename_all = "camelCase")]
-#[deserr(rename_all = camelCase, deny_unknown_fields)]
-pub struct UserProvidedSettings {
-    pub dimensions: usize,
-}
-
-impl From<UserProvidedSettings> for crate::vector::manual::EmbedderOptions {
-    fn from(value: UserProvidedSettings) -> Self {
-        Self { dimensions: value.dimensions }
-    }
-}
-
-impl From<crate::vector::manual::EmbedderOptions> for UserProvidedSettings {
-    fn from(value: crate::vector::manual::EmbedderOptions) -> Self {
-        Self { dimensions: value.dimensions }
-    }
-}

From 14b396d302e04e0cd4758081097cadfdf2e2c1a0 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:01:22 +0100
Subject: [PATCH 19/41] Add new errors

---
 meilisearch-types/src/error.rs |  5 ++++-
 milli/src/error.rs             | 29 ++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index 62591e991..2182b1836 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -344,7 +344,10 @@ impl ErrorCode for milli::Error {
                         Code::InvalidDocumentId
                     }
                     UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
-                    UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
+                    UserError::InvalidFieldForSource { .. }
+                    | UserError::MissingFieldForSource { .. }
+                    | UserError::InvalidOpenAiModel { .. }
+                    | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
                     UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
                     UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
                     UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
diff --git a/milli/src/error.rs b/milli/src/error.rs
index 9c5d8f416..539861e73 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -192,7 +192,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
     MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
     #[error(transparent)]
     InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
-    #[error("Invalid prompt in for embeddings with name '{0}': {1}.")]
+    #[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
     InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
     #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
     TooManyEmbedders(usize),
@@ -200,6 +200,33 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
     InvalidEmbedder(String),
     #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
     TooManyVectors(String, usize),
+    #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
+        allowed_sources_for_field
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", "),
+        allowed_fields_for_source
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", ")
+    )]
+    InvalidFieldForSource {
+        embedder_name: String,
+        source_: crate::vector::settings::EmbedderSource,
+        field: &'static str,
+        allowed_fields_for_source: &'static [&'static str],
+        allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
+    },
+    #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
+    InvalidOpenAiModel { embedder_name: String, model: String },
+    #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
+    MissingFieldForSource {
+        field: &'static str,
+        source_: crate::vector::settings::EmbedderSource,
+        embedder_name: String,
+    },
 }
 
 impl From<crate::vector::Error> for Error {

From 9123370e904beb9a96f31ba499604de29aa41d75 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:12:34 +0100
Subject: [PATCH 20/41] Validate fused settings in settings task after fusing
 with existing setting

---
 milli/src/update/mod.rs      |  2 +-
 milli/src/update/settings.rs | 89 +++++++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 13 deletions(-)

diff --git a/milli/src/update/mod.rs b/milli/src/update/mod.rs
index eb2b6e69a..66c52a52f 100644
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@@ -8,7 +8,7 @@ pub use self::index_documents::{
     MergeFn,
 };
 pub use self::indexer_config::IndexerConfig;
-pub use self::settings::{Setting, Settings};
+pub use self::settings::{validate_embedding_settings, Setting, Settings};
 pub use self::update_step::UpdateIndexingStep;
 pub use self::word_prefix_docids::WordPrefixDocids;
 pub use self::words_prefix_integer_docids::WordPrefixIntegerDocids;
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index e47a5ad52..d770bcd74 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -17,7 +17,7 @@ use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS
 use crate::proximity::ProximityPrecision;
 use crate::update::index_documents::IndexDocumentsMethod;
 use crate::update::{IndexDocuments, UpdateIndexingStep};
-use crate::vector::settings::{EmbeddingSettings, PromptSettings};
+use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings};
 use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
 use crate::{FieldsIdsMap, Index, OrderBy, Result};
 
@@ -958,17 +958,23 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                     .merge_join_by(configs.into_iter(), |(left, _), (right, _)| left.cmp(right))
                 {
                     match joined {
+                        // updated config
                         EitherOrBoth::Both((name, mut old), (_, new)) => {
-                            old.apply(new);
-                            let new = validate_prompt(&name, old)?;
-                            changed = true;
+                            changed |= old.apply(new);
+                            let new = validate_embedding_settings(old, &name)?;
                             new_configs.insert(name, new);
                         }
+                        // unchanged config
                         EitherOrBoth::Left((name, setting)) => {
                             new_configs.insert(name, setting);
                         }
-                        EitherOrBoth::Right((name, setting)) => {
-                            let setting = validate_prompt(&name, setting)?;
+                        // new config
+                        EitherOrBoth::Right((name, mut setting)) => {
+                            // apply the default source in case the source was not set so that it gets validated
+                            crate::vector::settings::EmbeddingSettings::apply_default_source(
+                                &mut setting,
+                            );
+                            let setting = validate_embedding_settings(setting, &name)?;
                             changed = true;
                             new_configs.insert(name, setting);
                         }
@@ -1080,8 +1086,12 @@ fn validate_prompt(
 ) -> Result<Setting<EmbeddingSettings>> {
     match new {
         Setting::Set(EmbeddingSettings {
-            embedder_options,
-            document_template: Setting::Set(PromptSettings { template: Setting::Set(template) }),
+            source,
+            model,
+            revision,
+            api_key,
+            dimensions,
+            document_template: Setting::Set(template),
         }) => {
             // validate
             let template = crate::prompt::Prompt::new(template)
@@ -1089,16 +1099,71 @@ fn validate_prompt(
                 .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
 
             Ok(Setting::Set(EmbeddingSettings {
-                embedder_options,
-                document_template: Setting::Set(PromptSettings {
-                    template: Setting::Set(template),
-                }),
+                source,
+                model,
+                revision,
+                api_key,
+                dimensions,
+                document_template: Setting::Set(template),
             }))
         }
         new => Ok(new),
     }
 }
 
+pub fn validate_embedding_settings(
+    settings: Setting<EmbeddingSettings>,
+    name: &str,
+) -> Result<Setting<EmbeddingSettings>> {
+    let settings = validate_prompt(name, settings)?;
+    let Setting::Set(settings) = settings else { return Ok(settings) };
+    let EmbeddingSettings { source, model, revision, api_key, dimensions, document_template } =
+        settings;
+    let Some(inferred_source) = source.set() else {
+        return Ok(Setting::Set(EmbeddingSettings {
+            source,
+            model,
+            revision,
+            api_key,
+            dimensions,
+            document_template,
+        }));
+    };
+    match inferred_source {
+        EmbedderSource::OpenAi => {
+            check_unset(&revision, "revision", inferred_source, name)?;
+            check_unset(&dimensions, "dimensions", inferred_source, name)?;
+            if let Setting::Set(model) = &model {
+                crate::vector::openai::EmbeddingModel::from_name(model.as_str()).ok_or(
+                    crate::error::UserError::InvalidOpenAiModel {
+                        embedder_name: name.to_owned(),
+                        model: model.clone(),
+                    },
+                )?;
+            }
+        }
+        EmbedderSource::HuggingFace => {
+            check_unset(&api_key, "apiKey", inferred_source, name)?;
+            check_unset(&dimensions, "dimensions", inferred_source, name)?;
+        }
+        EmbedderSource::UserProvided => {
+            check_unset(&model, "model", inferred_source, name)?;
+            check_unset(&revision, "revision", inferred_source, name)?;
+            check_unset(&api_key, "apiKey", inferred_source, name)?;
+            check_unset(&document_template, "documentTemplate", inferred_source, name)?;
+            check_set(&dimensions, "dimensions", inferred_source, name)?;
+        }
+    }
+    Ok(Setting::Set(EmbeddingSettings {
+        source,
+        model,
+        revision,
+        api_key,
+        dimensions,
+        document_template,
+    }))
+}
+
 #[cfg(test)]
 mod tests {
     use big_s::S;

From ec9649c922dcee54f2320e9a5e2f0b029063e773 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:13:27 +0100
Subject: [PATCH 21/41] Add function to validate settings in Meilisearch, to be
 used in the routes

---
 meilisearch-types/src/settings.rs | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs
index b0dee69a3..244fbffa2 100644
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -318,6 +318,21 @@ impl Settings<Unchecked> {
             _kind: PhantomData,
         }
     }
+
+    pub fn validate(self) -> Result<Self, milli::Error> {
+        self.validate_embedding_settings()
+    }
+
+    fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
+        let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
+        for (name, config) in configs.iter_mut() {
+            let config_to_check = std::mem::take(config);
+            let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
+            *config = checked_config
+        }
+        self.embedders = Setting::Set(configs);
+        Ok(self)
+    }
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]

From 2e4c9651dfb973be86a053f1971a30170c752736 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:13:55 +0100
Subject: [PATCH 22/41] Validate settings in route

---
 meilisearch/src/routes/indexes/settings.rs | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index 290cab2e0..feb174a1b 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -90,6 +90,8 @@ macro_rules! make_setting_route {
                     ..Default::default()
                 };
 
+                let new_settings = new_settings.validate()?;
+
                 let allow_index_creation =
                     index_scheduler.filters().allow_index_creation(&index_uid);
 
@@ -582,13 +584,13 @@ fn embedder_analytics(
         for source in s
             .values()
             .filter_map(|config| config.clone().set())
-            .filter_map(|config| config.embedder_options.set())
+            .filter_map(|config| config.source.set())
         {
-            use meilisearch_types::milli::vector::settings::EmbedderSettings;
+            use meilisearch_types::milli::vector::settings::EmbedderSource;
             match source {
-                EmbedderSettings::OpenAi(_) => sources.insert("openAi"),
-                EmbedderSettings::HuggingFace(_) => sources.insert("huggingFace"),
-                EmbedderSettings::UserProvided(_) => sources.insert("userProvided"),
+                EmbedderSource::OpenAi => sources.insert("openAi"),
+                EmbedderSource::HuggingFace => sources.insert("huggingFace"),
+                EmbedderSource::UserProvided => sources.insert("userProvided"),
             };
         }
     };
@@ -651,6 +653,7 @@ pub async fn update_all(
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     let new_settings = body.into_inner();
+    let new_settings = new_settings.validate()?;
 
     analytics.publish(
         "Settings Updated".to_string(),

From 6ff81de401608cbcfba3844e7d0a01a9c80ab82a Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:06:50 +0100
Subject: [PATCH 23/41] Fix tests

---
 meilisearch/tests/search/hybrid.rs      |  6 +++---
 meilisearch/tests/search/mod.rs         | 14 +++++++++++---
 milli/src/update/index_documents/mod.rs | 18 ++++++++++--------
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index fb6fe297f..77a29d4a3 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -21,9 +21,9 @@ async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Inde
     "###);
 
     let (response, code) = index
-        .update_settings(
-            json!({ "embedders": {"default": {"source": {"userProvided": {"dimensions": 2}}}} }),
-        )
+        .update_settings(json!({ "embedders": {"default": {
+                "source": "userProvided",
+                "dimensions": 2}}} ))
         .await;
     assert_eq!(202, code, "{:?}", response);
     index.wait_task(response.uid()).await;
diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index 133a143fd..9b7b01029 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -890,13 +890,21 @@ async fn experimental_feature_vector_store() {
     let (response, code) = index
         .update_settings(json!({"embedders": {
             "manual": {
-                "source": {
-                    "userProvided": {"dimensions": 3}
-                }
+                "source": "userProvided",
+                "dimensions": 3,
             }
         }}))
         .await;
 
+    meili_snap::snapshot!(response, @r###"
+    {
+      "taskUid": 1,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
     meili_snap::snapshot!(code, @"202 Accepted");
     let response = index.wait_task(response.uid()).await;
 
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index ffc3f6b3a..738cfeb38 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -2553,7 +2553,7 @@ mod tests {
     /// Vectors must be of the same length.
     #[test]
     fn test_multiple_vectors() {
-        use crate::vector::settings::{EmbedderSettings, EmbeddingSettings};
+        use crate::vector::settings::EmbeddingSettings;
         let index = TempIndex::new();
 
         index
@@ -2562,9 +2562,11 @@ mod tests {
                 embedders.insert(
                     "manual".to_string(),
                     Setting::Set(EmbeddingSettings {
-                        embedder_options: Setting::Set(EmbedderSettings::UserProvided(
-                            crate::vector::settings::UserProvidedSettings { dimensions: 3 },
-                        )),
+                        source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided),
+                        model: Setting::NotSet,
+                        revision: Setting::NotSet,
+                        api_key: Setting::NotSet,
+                        dimensions: Setting::Set(3),
                         document_template: Setting::NotSet,
                     }),
                 );
@@ -2579,10 +2581,10 @@ mod tests {
             .unwrap();
         index.add_documents(documents!([{"id": 1, "_vectors": { "manual": [6, 7, 8] }}])).unwrap();
         index
-            .add_documents(
-                documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
-            )
-            .unwrap();
+               .add_documents(
+                   documents!([{"id": 2, "_vectors": { "manual": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }}]),
+               )
+               .unwrap();
 
         let rtxn = index.read_txn().unwrap();
         let res = index.search(&rtxn).vector([0.0, 1.0, 2.0].to_vec()).execute().unwrap();

From 0bf879fb889711a1d57cbd5788a6bc790ac987f3 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 20 Dec 2023 17:48:09 +0100
Subject: [PATCH 24/41] Fix warning on rust stable

---
 milli/src/vector/settings.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs
index 945fc62c0..37fb80452 100644
--- a/milli/src/vector/settings.rs
+++ b/milli/src/vector/settings.rs
@@ -67,12 +67,12 @@ pub fn check_set<T>(
 }
 
 impl EmbeddingSettings {
-    pub const SOURCE: &str = "source";
-    pub const MODEL: &str = "model";
-    pub const REVISION: &str = "revision";
-    pub const API_KEY: &str = "apiKey";
-    pub const DIMENSIONS: &str = "dimensions";
-    pub const DOCUMENT_TEMPLATE: &str = "documentTemplate";
+    pub const SOURCE: &'static str = "source";
+    pub const MODEL: &'static str = "model";
+    pub const REVISION: &'static str = "revision";
+    pub const API_KEY: &'static str = "apiKey";
+    pub const DIMENSIONS: &'static str = "dimensions";
+    pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
 
     pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] {
         match field {

From f52dee2b3b2b63f9dc1755e68e32e2053a268a91 Mon Sep 17 00:00:00 2001
From: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
Date: Thu, 21 Dec 2023 09:53:13 +0100
Subject: [PATCH 25/41] Update Cargo.toml

Update mini-dashboard with v0.2.12
---
 meilisearch/Cargo.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml
index c59b38fa6..ceadbb1f1 100644
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -153,5 +153,5 @@ greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]
 
 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
-sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.12/build.zip"
+sha1 = "acfe9a018c93eb0604ea87ee87bff7df5474e18e"

From ee54d3171e4d4c5b69fb093ea2aa5858a086efcb Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 21 Dec 2023 15:26:12 +0100
Subject: [PATCH 26/41] Check experimental feature at query time

---
 index-scheduler/src/batch.rs               |  3 ---
 meilisearch/src/routes/indexes/settings.rs | 17 +++++++++++++++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index c1c7af7ac..f6ed8392c 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -1351,9 +1351,6 @@ impl IndexScheduler {
 
                 for (task, (_, settings)) in tasks.iter_mut().zip(settings) {
                     let checked_settings = settings.clone().check();
-                    if matches!(checked_settings.embedders, milli::update::Setting::Set(_)) {
-                        self.features().check_vector("Passing `embedders` in settings")?
-                    }
                     task.details = Some(Details::SettingsUpdate { settings: Box::new(settings) });
                     apply_settings_to_builder(&checked_settings, &mut builder);
 
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index feb174a1b..3ab6afe07 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -90,7 +90,10 @@ macro_rules! make_setting_route {
                     ..Default::default()
                 };
 
-                let new_settings = new_settings.validate()?;
+                let new_settings = $crate::routes::indexes::settings::validate_settings(
+                    new_settings,
+                    &index_scheduler,
+                )?;
 
                 let allow_index_creation =
                     index_scheduler.filters().allow_index_creation(&index_uid);
@@ -653,7 +656,7 @@ pub async fn update_all(
     let index_uid = IndexUid::try_from(index_uid.into_inner())?;
 
     let new_settings = body.into_inner();
-    let new_settings = new_settings.validate()?;
+    let new_settings = validate_settings(new_settings, &index_scheduler)?;
 
     analytics.publish(
         "Settings Updated".to_string(),
@@ -803,3 +806,13 @@ pub async fn delete_all(
     debug!("returns: {:?}", task);
     Ok(HttpResponse::Accepted().json(task))
 }
+
+fn validate_settings(
+    settings: Settings<Unchecked>,
+    index_scheduler: &IndexScheduler,
+) -> Result<Settings<Unchecked>, ResponseError> {
+    if matches!(settings.embedders, Setting::Set(_)) {
+        index_scheduler.features().check_vector("Passing `embedders` in settings")?
+    }
+    Ok(settings.validate()?)
+}

From 54ae6951eba871c0253c76992cc5030d7272903a Mon Sep 17 00:00:00 2001
From: Tamo <tamo@meilisearch.com>
Date: Tue, 2 Jan 2024 15:19:00 +0100
Subject: [PATCH 27/41] fix warning

---
 milli/src/update/index_documents/helpers/mod.rs | 2 +-
 milli/src/update/index_documents/mod.rs         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs
index 52638d6f6..1e29c0240 100644
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@@ -16,7 +16,7 @@ pub use merge_functions::{
     keep_first, keep_latest_obkv, merge_btreeset_string, merge_cbo_roaring_bitmaps,
     merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
     merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
-    obkvs_merge_additions_and_deletions, serialize_roaring_bitmap, MergeFn,
+    obkvs_merge_additions_and_deletions, MergeFn,
 };
 
 use crate::MAX_WORD_LENGTH;
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 738cfeb38..e797c0380 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -21,7 +21,7 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
 
 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{extract_finite_float_from_value, validate_geo_from_json, DocumentId};
+pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 pub use self::helpers::{
     as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
     fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps,

From 94b9f3b310fec35ac3d215d2725e460bd5ff64af Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 3 Jan 2024 12:45:13 +0100
Subject: [PATCH 28/41] Add test

---
 meilisearch/tests/search/hybrid.rs | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs
index 77a29d4a3..79819cab2 100644
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -56,6 +56,15 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
     }])
 });
 
+static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
+    json!([{
+            "title": "Shazam!",
+            "desc": "a Captain Marvel ersatz",
+            "id": "1",
+            "_vectors": {"default": [1.0, 3.0]},
+    }])
+});
+
 #[actix_rt::test]
 async fn simple_search() {
     let server = Server::new().await;
@@ -149,3 +158,18 @@ async fn invalid_semantic_ratio() {
     }
     "###);
 }
+
+#[actix_rt::test]
+async fn single_document() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
+
+    let (response, code) = index
+    .search_post(
+        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
+    )
+    .await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
+}

From 12edc2c20a3ae0a0ead0ac301b3c51c47a803e0d Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Wed, 3 Jan 2024 15:59:37 +0100
Subject: [PATCH 29/41] Update arroy to a fixed version

---
 Cargo.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Cargo.lock b/Cargo.lock
index 496710399..72701288f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -383,7 +383,7 @@ dependencies = [
 [[package]]
 name = "arroy"
 version = "0.1.0"
-source = "git+https://github.com/meilisearch/arroy.git#4f193fd534acd357b65bfe9eec4b3fed8ece2007"
+source = "git+https://github.com/meilisearch/arroy.git#d372648212e561a4845077cdb9239423d78655a2"
 dependencies = [
  "bytemuck",
  "byteorder",

From f75f22e0263c4e76913cae37b5e56d0fa424a267 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 8 Jan 2024 11:09:37 +0100
Subject: [PATCH 30/41] Display default value when proximityPrecision is not
 set

---
 meilisearch-types/src/settings.rs          | 20 ++++++++++++++++--
 meilisearch/tests/dumps/mod.rs             | 24 +++++++++++-----------
 meilisearch/tests/settings/get_settings.rs |  1 +
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs
index 244fbffa2..7606fa44a 100644
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -37,6 +37,17 @@ where
     .serialize(s)
 }
 
+fn serialize_with_default<T, S>(field: &Setting<T>, s: S) -> std::result::Result<S::Ok, S::Error>
+where
+    T: Default + Serialize,
+    S: Serializer,
+{
+    match field {
+        Setting::Set(value) => value.serialize(s),
+        Setting::Reset | Setting::NotSet => T::default().serialize(s),
+    }
+}
+
 #[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
 pub struct Checked;
 
@@ -186,7 +197,11 @@ pub struct Settings<T> {
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
     #[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
     pub distinct_attribute: Setting<String>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[serde(
+        default,
+        serialize_with = "serialize_with_default",
+        skip_serializing_if = "Setting::is_not_set"
+    )]
     #[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
     pub proximity_precision: Setting<ProximityPrecisionView>,
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@@ -735,10 +750,11 @@ impl From<RankingRuleView> for Criterion {
     }
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
 #[serde(deny_unknown_fields, rename_all = "camelCase")]
 #[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
 pub enum ProximityPrecisionView {
+    #[default]
     ByWord,
     ByAttribute,
 }
diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs
index 5ad0ed247..90b9b91da 100644
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@@ -59,7 +59,7 @@ async fn import_dump_v1_movie_raw() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -221,7 +221,7 @@ async fn import_dump_v1_movie_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -369,7 +369,7 @@ async fn import_dump_v1_rubygems_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -503,7 +503,7 @@ async fn import_dump_v2_movie_raw() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -649,7 +649,7 @@ async fn import_dump_v2_movie_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -794,7 +794,7 @@ async fn import_dump_v2_rubygems_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -928,7 +928,7 @@ async fn import_dump_v3_movie_raw() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -1074,7 +1074,7 @@ async fn import_dump_v3_movie_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -1219,7 +1219,7 @@ async fn import_dump_v3_rubygems_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -1353,7 +1353,7 @@ async fn import_dump_v4_movie_raw() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -1499,7 +1499,7 @@ async fn import_dump_v4_movie_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
@@ -1644,7 +1644,7 @@ async fn import_dump_v4_rubygems_with_settings() {
       "dictionary": [],
       "synonyms": {},
       "distinctAttribute": null,
-      "proximityPrecision": null,
+      "proximityPrecision": "byWord",
       "typoTolerance": {
         "enabled": true,
         "minWordSizeForTypos": {
diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs
index 9ab53c51e..79689c1b5 100644
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -84,6 +84,7 @@ async fn get_settings() {
         })
     );
     assert_eq!(settings["embedders"], json!({}));
+    assert_eq!(settings["proximityPrecision"], json!("byWord"));
 }
 
 #[actix_rt::test]

From e27b850b0989609e7e97af8023e915168db57a55 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Mon, 8 Jan 2024 14:03:47 +0100
Subject: [PATCH 31/41] move the default display strategy on setting getter
 function

---
 meilisearch-types/src/settings.rs | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs
index 7606fa44a..9613cb9f3 100644
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -37,17 +37,6 @@ where
     .serialize(s)
 }
 
-fn serialize_with_default<T, S>(field: &Setting<T>, s: S) -> std::result::Result<S::Ok, S::Error>
-where
-    T: Default + Serialize,
-    S: Serializer,
-{
-    match field {
-        Setting::Set(value) => value.serialize(s),
-        Setting::Reset | Setting::NotSet => T::default().serialize(s),
-    }
-}
-
 #[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
 pub struct Checked;
 
@@ -197,11 +186,7 @@ pub struct Settings<T> {
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
     #[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
     pub distinct_attribute: Setting<String>,
-    #[serde(
-        default,
-        serialize_with = "serialize_with_default",
-        skip_serializing_if = "Setting::is_not_set"
-    )]
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
     #[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
     pub proximity_precision: Setting<ProximityPrecisionView>,
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@@ -641,10 +626,7 @@ pub fn settings(
             Some(field) => Setting::Set(field),
             None => Setting::Reset,
         },
-        proximity_precision: match proximity_precision {
-            Some(precision) => Setting::Set(precision),
-            None => Setting::Reset,
-        },
+        proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
         synonyms: Setting::Set(synonyms),
         typo_tolerance: Setting::Set(typo_tolerance),
         faceting: Setting::Set(faceting),

From 97bb1ff9e261e0802a0c19d4977d8b0f2fe9d999 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Tue, 9 Jan 2024 15:37:27 +0100
Subject: [PATCH 32/41] Move `currently_updating_index` to IndexMapper

---
 index-scheduler/src/batch.rs            |  4 ++--
 index-scheduler/src/index_mapper/mod.rs | 17 +++++++++++++++++
 index-scheduler/src/insta_snapshot.rs   |  1 -
 index-scheduler/src/lib.rs              | 15 +--------------
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/index-scheduler/src/batch.rs b/index-scheduler/src/batch.rs
index f6ed8392c..97dc7a2bd 100644
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -936,8 +936,8 @@ impl IndexScheduler {
                 };
 
                 // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
-                *self.currently_updating_index.write().unwrap() =
-                    Some((index_uid.clone(), index.clone()));
+                self.index_mapper
+                    .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
 
                 let mut index_wtxn = index.write_txn()?;
                 let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
diff --git a/index-scheduler/src/index_mapper/mod.rs b/index-scheduler/src/index_mapper/mod.rs
index 18aed42b0..58ec2bf11 100644
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@@ -69,6 +69,10 @@ pub struct IndexMapper {
     /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
     enable_mdb_writemap: bool,
     pub indexer_config: Arc<IndexerConfig>,
+
+    /// A few types of long running batches of tasks that act on a single index set this field
+    /// so that a handle to the index is available from other threads (search) in an optimized manner.
+    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
 }
 
 /// Whether the index is available for use or is forbidden to be inserted back in the index map
@@ -151,6 +155,7 @@ impl IndexMapper {
             index_growth_amount,
             enable_mdb_writemap,
             indexer_config: Arc::new(indexer_config),
+            currently_updating_index: Default::default(),
         })
     }
 
@@ -303,6 +308,14 @@ impl IndexMapper {
 
     /// Return an index, may open it if it wasn't already opened.
     pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
+        if let Some((current_name, current_index)) =
+            self.currently_updating_index.read().unwrap().as_ref()
+        {
+            if current_name == name {
+                return Ok(current_index.clone());
+            }
+        }
+
         let uuid = self
             .index_mapping
             .get(rtxn, name)?
@@ -474,4 +487,8 @@ impl IndexMapper {
     pub fn indexer_config(&self) -> &IndexerConfig {
         &self.indexer_config
     }
+
+    pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
+        *self.currently_updating_index.write().unwrap() = index;
+    }
 }
diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs
index 0adda43ff..42f041578 100644
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -42,7 +42,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
         test_breakpoint_sdr: _,
         planned_failures: _,
         run_loop_iteration: _,
-        currently_updating_index: _,
         embedders: _,
     } = scheduler;
 
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 296f8add1..a5b0cb5b0 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -351,10 +351,6 @@ pub struct IndexScheduler {
     /// The path to the version file of Meilisearch.
     pub(crate) version_file_path: PathBuf,
 
-    /// A few types of long running batches of tasks that act on a single index set this field
-    /// so that a handle to the index is available from other threads (search) in an optimized manner.
-    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
-
     embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
 
     // ================= test
@@ -403,7 +399,6 @@ impl IndexScheduler {
             version_file_path: self.version_file_path.clone(),
             webhook_url: self.webhook_url.clone(),
             webhook_authorization_header: self.webhook_authorization_header.clone(),
-            currently_updating_index: self.currently_updating_index.clone(),
             embedders: self.embedders.clone(),
             #[cfg(test)]
             test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
@@ -504,7 +499,6 @@ impl IndexScheduler {
             version_file_path: options.version_file_path,
             webhook_url: options.webhook_url,
             webhook_authorization_header: options.webhook_authorization_header,
-            currently_updating_index: Arc::new(RwLock::new(None)),
             embedders: Default::default(),
 
             #[cfg(test)]
@@ -688,13 +682,6 @@ impl IndexScheduler {
     /// If you need to fetch information from or perform an action on all indexes,
     /// see the `try_for_each_index` function.
     pub fn index(&self, name: &str) -> Result<Index> {
-        if let Some((current_name, current_index)) =
-            self.currently_updating_index.read().unwrap().as_ref()
-        {
-            if current_name == name {
-                return Ok(current_index.clone());
-            }
-        }
         let rtxn = self.env.read_txn()?;
         self.index_mapper.index(&rtxn, name)
     }
@@ -1175,7 +1162,7 @@ impl IndexScheduler {
         };
 
         // Reset the currently updating index to relinquish the index handle
-        *self.currently_updating_index.write().unwrap() = None;
+        self.index_mapper.set_currently_updating_index(None);
 
         #[cfg(test)]
         self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

From 3f3462ab62abbf5d8af7621755ab2b7e87f1ea8c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Wed, 10 Jan 2024 16:34:40 +0100
Subject: [PATCH 33/41] Limit the number of values returned by the facet search

---
 meilisearch/src/search.rs                |  3 +++
 meilisearch/tests/search/facet_search.rs | 18 ++++++++++++++++++
 milli/src/search/mod.rs                  | 22 +++++++++++++++++-----
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs
index b5dba8a58..399c7b95d 100644
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -735,6 +735,9 @@ pub fn perform_facet_search(
     if let Some(facet_query) = &facet_query {
         facet_search.query(facet_query);
     }
+    if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
+        facet_search.max_values(max_facets as usize);
+    }
 
     Ok(FacetSearchResult {
         facet_hits: facet_search.execute()?,
diff --git a/meilisearch/tests/search/facet_search.rs b/meilisearch/tests/search/facet_search.rs
index 8c1229f1a..5f9f631f9 100644
--- a/meilisearch/tests/search/facet_search.rs
+++ b/meilisearch/tests/search/facet_search.rs
@@ -105,6 +105,24 @@ async fn more_advanced_facet_search() {
     snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
 }
 
+#[actix_rt::test]
+async fn simple_facet_search_with_max_values() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
+    index.update_settings_filterable_attributes(json!(["genres"])).await;
+    index.add_documents(documents, None).await;
+    index.wait_task(2).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
+}
+
 #[actix_rt::test]
 async fn non_filterable_facet_search_error() {
     let server = Server::new().await;
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 3e4849578..7bac5ea0c 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -27,8 +27,8 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
 static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
 static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
 
-/// The maximum number of facets returned by the facet search route.
-const MAX_NUMBER_OF_FACETS: usize = 100;
+/// The maximum number of values per facet returned by the facet search route.
+const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
 
 pub mod facet;
 mod fst_utils;
@@ -306,6 +306,7 @@ pub struct SearchForFacetValues<'a> {
     query: Option<String>,
     facet: String,
     search_query: Search<'a>,
+    max_values: usize,
     is_hybrid: bool,
 }
 
@@ -315,7 +316,13 @@ impl<'a> SearchForFacetValues<'a> {
         search_query: Search<'a>,
         is_hybrid: bool,
     ) -> SearchForFacetValues<'a> {
-        SearchForFacetValues { query: None, facet, search_query, is_hybrid }
+        SearchForFacetValues {
+            query: None,
+            facet,
+            search_query,
+            max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
+            is_hybrid,
+        }
     }
 
     pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
@@ -323,6 +330,11 @@ impl<'a> SearchForFacetValues<'a> {
         self
     }
 
+    pub fn max_values(&mut self, max: usize) -> &mut Self {
+        self.max_values = max;
+        self
+    }
+
     fn one_original_value_of(
         &self,
         field_id: FieldId,
@@ -462,7 +474,7 @@ impl<'a> SearchForFacetValues<'a> {
                             .unwrap_or_else(|| left_bound.to_string());
                         results.push(FacetValueHit { value, count });
                     }
-                    if results.len() >= MAX_NUMBER_OF_FACETS {
+                    if results.len() >= self.max_values {
                         break;
                     }
                 }
@@ -507,7 +519,7 @@ impl<'a> SearchForFacetValues<'a> {
                     .unwrap_or_else(|| query.to_string());
                 results.push(FacetValueHit { value, count });
             }
-            if results.len() >= MAX_NUMBER_OF_FACETS {
+            if results.len() >= self.max_values {
                 return Ok(ControlFlow::Break(()));
             }
         }

From 5f4fc6c95569b5b3c7aa81ffee3eea02d0100095 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 11 Jan 2024 09:44:16 +0100
Subject: [PATCH 34/41] Add timer logs

---
 milli/src/search/new/bucket_sort.rs            | 1 +
 milli/src/search/new/mod.rs                    | 2 ++
 milli/src/search/new/query_term/parse_query.rs | 1 +
 3 files changed, 4 insertions(+)

diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs
index b46f6124f..b439b87ec 100644
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -15,6 +15,7 @@ pub struct BucketSortOutput {
 
 // TODO: would probably be good to regroup some of these inside of a struct?
 #[allow(clippy::too_many_arguments)]
+#[logging_timer::time]
 pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
     ctx: &mut SearchContext<'ctx>,
     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 405b9747d..7b3b1d5b2 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph(
     Ok(docids)
 }
 
+#[logging_timer::time]
 fn resolve_universe(
     ctx: &mut SearchContext,
     initial_universe: &RoaringBitmap,
@@ -556,6 +557,7 @@ pub fn execute_vector_search(
 }
 
 #[allow(clippy::too_many_arguments)]
+#[logging_timer::time]
 pub fn execute_search(
     ctx: &mut SearchContext,
     query: Option<&str>,
diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs
index 64fe07a31..865075d97 100644
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -5,6 +5,7 @@ use super::*;
 use crate::{Result, SearchContext, MAX_WORD_LENGTH};
 
 /// Convert the tokenised search query into a list of located query terms.
+#[logging_timer::time]
 pub fn located_query_terms_from_tokens(
     ctx: &mut SearchContext,
     query: NormalizedTokenIter,

From 5f5a4868953e94f885654a6d2b8bfbd8afae3da0 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 11 Jan 2024 11:36:41 +0100
Subject: [PATCH 35/41] Reduce formatting time

---
 meilisearch/src/search.rs           | 23 +++++++++++++++++++----
 milli/src/search/new/matches/mod.rs |  6 +++++-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs
index b5dba8a58..7e783aa42 100644
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -897,6 +897,14 @@ fn format_fields<'a>(
     let mut matches_position = compute_matches.then(BTreeMap::new);
     let mut document = document.clone();
 
+    // reduce the formated option list to the attributes that should be formatted,
+    // instead of all the attribute to display.
+    let formating_fields_options: Vec<_> = formatted_options
+        .iter()
+        .filter(|(_, option)| option.should_format())
+        .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
+        .collect();
+
     // select the attributes to retrieve
     let displayable_names =
         displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@@ -905,13 +913,15 @@ fn format_fields<'a>(
         // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
         // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
         // highlighted.
-        let format = formatted_options
+        // Warn: The time to compute the `format` list scales with the number of field to format;
+        // cummulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity:
+        // `d*f` where `d` is the total number of field to display and `f` the total number of field to format.
+        let format = formating_fields_options
             .iter()
-            .filter(|(field, _option)| {
-                let name = field_ids_map.name(**field).unwrap();
+            .filter(|(name, _option)| {
                 milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
             })
-            .map(|(_, option)| *option)
+            .map(|(_, option)| **option)
             .reduce(|acc, option| acc.merge(option));
         let mut infos = Vec::new();
 
@@ -941,6 +951,11 @@ fn format_value<'a>(
     infos: &mut Vec<MatchBounds>,
     compute_matches: bool,
 ) -> Value {
+    // early skip recursive function if nothing needs to be changed.
+    if !format_options.as_ref().map_or(false, FormatOptions::should_format) && !compute_matches {
+        return value;
+    }
+
     match value {
         Value::String(old_string) => {
             let mut matcher = builder.build(&old_string);
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 067fa1efd..8de1d9262 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> {
     }
 }
 
-#[derive(Copy, Clone, Default)]
+#[derive(Copy, Clone, Default, Debug)]
 pub struct FormatOptions {
     pub highlight: bool,
     pub crop: Option<usize>,
@@ -82,6 +82,10 @@ impl FormatOptions {
     pub fn merge(self, other: Self) -> Self {
         Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
     }
+
+    pub fn should_format(&self) -> bool {
+        self.highlight || self.crop.is_some()
+    }
 }
 
 #[derive(Clone, Debug)]

From 81b6128b29b612049c5579ca6fb9082d5f46a216 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 11 Jan 2024 12:28:32 +0100
Subject: [PATCH 36/41] Update tests

---
 meilisearch/tests/settings/tokenizer_customization.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs
index 4602e31f7..7f205eb30 100644
--- a/meilisearch/tests/settings/tokenizer_customization.rs
+++ b/meilisearch/tests/settings/tokenizer_customization.rs
@@ -94,7 +94,7 @@ async fn set_and_search() {
                 "id": 1,
                 "content": "Mac & cheese",
                 "_formatted": {
-                  "id": "1",
+                  "id": 1,
                   "content": "Mac <em>&</em> cheese"
                 }
               },
@@ -102,7 +102,7 @@ async fn set_and_search() {
                 "id": 3,
                 "content": "Mac&sep&&sepcheese",
                 "_formatted": {
-                  "id": "3",
+                  "id": 3,
                   "content": "Mac&sep<em>&</em>&sepcheese"
                 }
               }
@@ -254,7 +254,7 @@ async fn advanced_synergies() {
                 "id": 1,
                 "content": "J.R.R. Tolkien",
                 "_formatted": {
-                  "id": "1",
+                  "id": 1,
                   "content": "<em>J.R.R.</em> Tolkien"
                 }
               },
@@ -262,7 +262,7 @@ async fn advanced_synergies() {
                 "id": 2,
                 "content": "J. R. R. Tolkien",
                 "_formatted": {
-                  "id": "2",
+                  "id": 2,
                   "content": "<em>J. R. R.</em> Tolkien"
                 }
               },
@@ -270,7 +270,7 @@ async fn advanced_synergies() {
                 "id": 3,
                 "content": "jrr Tolkien",
                 "_formatted": {
-                  "id": "3",
+                  "id": 3,
                   "content": "<em>jrr</em> Tolkien"
                 }
               }

From 86270e687837fcafc2730b017c29a01d3d2e6613 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 11 Jan 2024 12:43:56 +0100
Subject: [PATCH 37/41] Transform fields contained into _format into strings

---
 meilisearch/src/search.rs                             |  7 +------
 meilisearch/tests/settings/tokenizer_customization.rs | 10 +++++-----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs
index 7e783aa42..6893081d4 100644
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -951,11 +951,6 @@ fn format_value<'a>(
     infos: &mut Vec<MatchBounds>,
     compute_matches: bool,
 ) -> Value {
-    // early skip recursive function if nothing needs to be changed.
-    if !format_options.as_ref().map_or(false, FormatOptions::should_format) && !compute_matches {
-        return value;
-    }
-
     match value {
         Value::String(old_string) => {
             let mut matcher = builder.build(&old_string);
@@ -1023,7 +1018,7 @@ fn format_value<'a>(
                     let value = matcher.format(format_options);
                     Value::String(value.into_owned())
                 }
-                None => Value::Number(number),
+                None => Value::String(s),
             }
         }
         value => value,
diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs
index 7f205eb30..4602e31f7 100644
--- a/meilisearch/tests/settings/tokenizer_customization.rs
+++ b/meilisearch/tests/settings/tokenizer_customization.rs
@@ -94,7 +94,7 @@ async fn set_and_search() {
                 "id": 1,
                 "content": "Mac & cheese",
                 "_formatted": {
-                  "id": 1,
+                  "id": "1",
                   "content": "Mac <em>&</em> cheese"
                 }
               },
@@ -102,7 +102,7 @@ async fn set_and_search() {
                 "id": 3,
                 "content": "Mac&sep&&sepcheese",
                 "_formatted": {
-                  "id": 3,
+                  "id": "3",
                   "content": "Mac&sep<em>&</em>&sepcheese"
                 }
               }
@@ -254,7 +254,7 @@ async fn advanced_synergies() {
                 "id": 1,
                 "content": "J.R.R. Tolkien",
                 "_formatted": {
-                  "id": 1,
+                  "id": "1",
                   "content": "<em>J.R.R.</em> Tolkien"
                 }
               },
@@ -262,7 +262,7 @@ async fn advanced_synergies() {
                 "id": 2,
                 "content": "J. R. R. Tolkien",
                 "_formatted": {
-                  "id": 2,
+                  "id": "2",
                   "content": "<em>J. R. R.</em> Tolkien"
                 }
               },
@@ -270,7 +270,7 @@ async fn advanced_synergies() {
                 "id": 3,
                 "content": "jrr Tolkien",
                 "_formatted": {
-                  "id": 3,
+                  "id": "3",
                   "content": "<em>jrr</em> Tolkien"
                 }
               }

From b79b03d4e2b8cb3261bd2b5add1c75870c92756c Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 11 Jan 2024 13:14:51 +0100
Subject: [PATCH 38/41] Fix proximity precision telemetry

---
 meilisearch/src/routes/indexes/settings.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index 3ab6afe07..decc4ffc9 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -458,7 +458,7 @@ make_setting_route!(
             json!({
                 "proximity_precision": {
                     "set": precision.is_some(),
-                    "value": precision,
+                    "value": precision.unwrap_or_default(),
                 }
             }),
             Some(req),
@@ -690,7 +690,8 @@ pub async fn update_all(
                 "set": new_settings.distinct_attribute.as_ref().set().is_some()
             },
             "proximity_precision": {
-                "set": new_settings.proximity_precision.as_ref().set().is_some()
+                "set": new_settings.proximity_precision.as_ref().set().is_some(),
+                "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
             },
             "typo_tolerance": {
                 "enabled": new_settings.typo_tolerance

From 95f8e2153394f3089602b1ffa16b3548c520cc98 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 11 Jan 2024 15:07:08 +0100
Subject: [PATCH 39/41] fix typos

---
 meilisearch/src/search.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs
index 6893081d4..28ca5a6e1 100644
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -897,9 +897,9 @@ fn format_fields<'a>(
     let mut matches_position = compute_matches.then(BTreeMap::new);
     let mut document = document.clone();
 
-    // reduce the formated option list to the attributes that should be formatted,
-    // instead of all the attribute to display.
-    let formating_fields_options: Vec<_> = formatted_options
+    // reduce the formatted option list to the attributes that should be formatted,
+    // instead of all the attributes to display.
+    let formatting_fields_options: Vec<_> = formatted_options
         .iter()
         .filter(|(_, option)| option.should_format())
         .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
@@ -913,10 +913,10 @@ fn format_fields<'a>(
         // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
         // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
         // highlighted.
-        // Warn: The time to compute the `format` list scales with the number of field to format;
-        // cummulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity:
-        // `d*f` where `d` is the total number of field to display and `f` the total number of field to format.
-        let format = formating_fields_options
+        // Warn: The time to compute the format list scales with the number of fields to format;
+        // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
+        // d*f where d is the total number of fields to display and f is the total number of fields to format.
+        let format = formatting_fields_options
             .iter()
             .filter(|(name, _option)| {
                 milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)

From 84a5c304fc78d85652897182cc66a85a454c0579 Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 11 Jan 2024 21:35:06 +0100
Subject: [PATCH 40/41] Don't display the embedders setting when it is an empty
 dict

---
 meilisearch-types/src/settings.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs
index 9613cb9f3..ca46abb0c 100644
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -600,11 +600,12 @@ pub fn settings(
         ),
     };
 
-    let embedders = index
+    let embedders: BTreeMap<_, _> = index
         .embedding_configs(rtxn)?
         .into_iter()
         .map(|(name, config)| (name, Setting::Set(config.into())))
         .collect();
+    let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
 
     Ok(Settings {
         displayed_attributes: match displayed_attributes {
@@ -631,7 +632,7 @@ pub fn settings(
         typo_tolerance: Setting::Set(typo_tolerance),
         faceting: Setting::Set(faceting),
         pagination: Setting::Set(pagination),
-        embedders: Setting::Set(embedders),
+        embedders,
         _kind: PhantomData,
     })
 }

From 38abfec611142ae97e54932a9238b6941032ac0b Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 11 Jan 2024 21:35:30 +0100
Subject: [PATCH 41/41] Fix tests

---
 meilisearch/tests/dumps/mod.rs             | 39 ++++++++--------------
 meilisearch/tests/settings/get_settings.rs |  3 +-
 2 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs
index 90b9b91da..fd34268a5 100644
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@@ -77,8 +77,7 @@ async fn import_dump_v1_movie_raw() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -239,8 +238,7 @@ async fn import_dump_v1_movie_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -387,8 +385,7 @@ async fn import_dump_v1_rubygems_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -521,8 +518,7 @@ async fn import_dump_v2_movie_raw() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -667,8 +663,7 @@ async fn import_dump_v2_movie_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -812,8 +807,7 @@ async fn import_dump_v2_rubygems_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -946,8 +940,7 @@ async fn import_dump_v3_movie_raw() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -1092,8 +1085,7 @@ async fn import_dump_v3_movie_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -1237,8 +1229,7 @@ async fn import_dump_v3_rubygems_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -1371,8 +1362,7 @@ async fn import_dump_v4_movie_raw() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -1517,8 +1507,7 @@ async fn import_dump_v4_movie_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -1662,8 +1651,7 @@ async fn import_dump_v4_rubygems_with_settings() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###
     );
@@ -1907,8 +1895,7 @@ async fn import_dump_v6_containing_experimental_features() {
       },
       "pagination": {
         "maxTotalHits": 1000
-      },
-      "embedders": {}
+      }
     }
     "###);
 
diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs
index 79689c1b5..5642e854f 100644
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -54,7 +54,7 @@ async fn get_settings() {
     let (response, code) = index.settings().await;
     assert_eq!(code, 200);
     let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 16);
+    assert_eq!(settings.keys().len(), 15);
     assert_eq!(settings["displayedAttributes"], json!(["*"]));
     assert_eq!(settings["searchableAttributes"], json!(["*"]));
     assert_eq!(settings["filterableAttributes"], json!([]));
@@ -83,7 +83,6 @@ async fn get_settings() {
             "maxTotalHits": 1000,
         })
     );
-    assert_eq!(settings["embedders"], json!({}));
     assert_eq!(settings["proximityPrecision"], json!("byWord"));
 }